<tool id="cdt.managedbuild.tool.gnu.archiver.base.374652938" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.base.738159103" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.base">
<option id="gnu.cpp.compiler.option.include.paths.216814103" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
- <listOptionValue builtIn="false" value="/usr/include/ns3.22"/>
+ <listOptionValue builtIn="false" value="/usr/include/ns3.26"/>
<listOptionValue builtIn="false" value="/opt/gtnets/include"/>
<listOptionValue builtIn="false" value="/usr/include/lua5.3"/>
<listOptionValue builtIn="false" value="/usr/lib/jvm/java-8-openjdk-amd64/include"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.base.1053916774" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.base">
<option id="gnu.c.compiler.option.include.paths.100642505" name="Include paths (-I)" superClass="gnu.c.compiler.option.include.paths" valueType="includePath">
- <listOptionValue builtIn="false" value="/usr/include/ns3.22"/>
+ <listOptionValue builtIn="false" value="/usr/include/ns3.26"/>
<listOptionValue builtIn="false" value="/opt/gtnets/include"/>
<listOptionValue builtIn="false" value="/usr/include/lua5.3"/>
<listOptionValue builtIn="false" value="/usr/lib/jvm/java-8-openjdk-amd64/include"/>
Virtual Machines
- Allow multicore VMs, along with the correct sharing computations
+ Energy
+ - New function to update the consumption of all hosts at once.
+
MSG
- The netzone are now available from the MSG API.
The old names still work, but are now deprecated.
SMPI
- New algorithm to privatize globals: dlopen, with dynamic loading tricks
- New option: smpi/keep-temps to not cleanup temp files
+ - New option : smpi/shared-malloc-blocksize . Relevant only when global shared
+ mallocs mode is used, allows to change the size of the fake file used
+ (default 1MB), to potentially limit the number of mappings for large runs.
- Support for sparse privatized malloc with SMPI_PARTIAL_SHARED_MALLOC()
+ - Fortran ifort and flang compilers support
+ - New RMA calls supported (experimental) :
+ - MPI_Win_allocate, MPI_Win_create_dynamic, MPI_Win_attach
+ - MPI_Win_detach, MPI_Win_set_info, MPI_Win_get_info
+ - MPI_Win_lock_all, MPI_Win_unlock_all, MPI_Win_flush
+ - MPI_Win_flush_local, MPI_Win_flush_all, MPI_Win_flush_local_all
+ - MPI_Op_commutative, MPI_Fetch_and_Op, MPI_Compare_and_swap
+ - MPI_Rput, MPI_Rget, MPI_Raccumulate, MPI_Rget_accumulate
XBT
- Replay: New function xbt_replay_action_get():
@code
#include <xbt/config.h>
-extern xbt_cfg_t _sg_cfg_set;
-
int main(int argc, char *argv[]) {
SD_init(&argc, argv);
/* Prefer MSG_config() if you use MSG!! */
- xbt_cfg_set_parse(_sg_cfg_set,"Item:Value");
+ xbt_cfg_set_parse("Item:Value");
// Rest of your code
}
typedef s_broadcaster_t *broadcaster_t;
-xbt_dynar_t build_hostlist_from_hostcount(int hostcount);
+xbt_dynar_t build_hostlist_from_hostcount(int hostcount);
/* Broadcaster: helper functions */
broadcaster_t broadcaster_init(xbt_dynar_t host_list, unsigned int piece_count);
#include "peer.h"
/** @addtogroup MSG_examples
- *
+ *
* - <b>chainsend: MSG implementation of a file broadcasting system, similar to Kastafior (from Kadeploy).</b>.
*/
xbt_dynar_iterator_t xbt_dynar_iterator_new(xbt_dynar_t list, xbt_dynar_t (*criteria_fn)(int))
{
xbt_dynar_iterator_t it = xbt_new(xbt_dynar_iterator_s, 1);
-
+
it->list = list;
it->length = xbt_dynar_length(list);
it->indices_list = criteria_fn(it->length); // Creates and fills a dynar of int
{
int done = 0;
message_t msg = MSG_task_get_data(task);
-
+
XBT_DEBUG("Peer %s got message of type %d\n", peer->me, msg->type);
if (msg->type == MESSAGE_BUILD_CHAIN)
peer_init_chain(peer, msg);
void peer_print_stats(peer_t p, float elapsed_time)
{
- XBT_INFO("### %f %llu bytes (Avg %f MB/s); copy finished (simulated).", elapsed_time, p->bytes, p->bytes / 1024.0 / 1024.0 / elapsed_time);
+ XBT_INFO("### %f %llu bytes (Avg %f MB/s); copy finished (simulated).", elapsed_time, p->bytes, p->bytes / 1024.0 / 1024.0 / elapsed_time);
}
/** Peer function */
/* Main function of the Sender process */
static int sender(int argc, char *argv[])
{
- xbt_assert(argc==7, "The sender function expects 6 arguments from the XML deployment file");
+ xbt_assert(argc==7, "The sender function expects 6 arguments from the XML deployment file");
long number_of_tasks = xbt_str_parse_int(argv[1], "Invalid amount of tasks: %s"); /* - number of tasks */
double task_comp_size = xbt_str_parse_double(argv[2], "Invalid computational size: %s"); /* - computational cost */
double task_comm_size = xbt_str_parse_double(argv[3], "Invalid communication size: %s"); /* - communication cost */
char mailbox[80];
snprintf(mailbox,79, "receiver-%d", id);
-
+
MSG_process_sleep(10);
while (1) {
XBT_INFO("Wait to receive a task");
xbt_dynar_push_as(comms, msg_comm_t, comm);
XBT_INFO("Send to receiver-%ld %s comm_size %f", i % receivers_count, taskname, task_comm_size / coef);
}
-
+
/* Here we are waiting for the completion of all communications */
while (xbt_dynar_is_empty(comms) == 0) {
msg_comm_t comm;
char mailbox[80];
snprintf(mailbox,79, "receiver-%d", id);
-
+
MSG_process_sleep(10);
for (int i = 0; i < task_amount; i++) {
XBT_INFO("Wait to receive task %d", i);
{
char *mbox = bprintf("MBOX:%s-%s", MSG_host_get_name(tx_host), MSG_host_get_name(rx_host));
char **argv = NULL;
-
+
const char *pr_name_tx = "comm_tx";
argv = xbt_new(char *, 3);
argv[0] = xbt_strdup(pr_name_tx);
MSG_process_create_with_arguments(pr_name_tx, communication_tx_fun, NULL, tx_host, 2, argv);
- const char *pr_name_rx = "comm_rx";
+ const char *pr_name_rx = "comm_rx";
argv = xbt_new(char *, 3);
argv[0] = xbt_strdup(pr_name_rx);
argv[1] = xbt_strdup(mbox);
}
/* Sets a finger of the current node.
- *
+ *
* \param node the current node
* \param finger_index index of the finger to set (0 to nb_bits - 1)
* \param id the id to set for this finger
}
/* Sets the predecessor of the current node.
- *
+ *
* \param node the current node
* \param id the id to predecessor, or -1 to unset the predecessor
*/
}
/* Node main Function
- *
+ *
* Arguments:
* - my id
* - the id of a guy I know in the system (except for the first node)
* - the time to sleep before I join (except for the first node)
*/
/* This function is called when the current node receives a task.
- *
+ *
* \param node the current node
* \param task the task to handle (don't touch it afterward: it will be destroyed, reused or forwarded)
*/
}
/* Makes the current node join the ring, knowing the id of a node already in the ring
- *
+ *
* \param node the current node
* \param known_id id of a node already in the ring
* \return 1 if the join operation succeeded, 0 otherwise
}
/* Makes the current node find the successor node of an id.
- *
+ *
* \param node the current node
* \param id the id to find
* \return the id of the successor node, or -1 if the request failed
}
/* \brief Asks another node the successor node of an id.
- *
+ *
* \param node the current node
* \param ask_to the node to ask to
* \param id the id to find
}
/* Asks its predecessor to a remote node
- *
+ *
* \param node the current node
* \param ask_to the node to ask to
* \return the id of its predecessor node, or -1 if the request failed
}
/* Returns the closest preceding finger of an id with respect to the finger table of the current node.
- *
+ *
* \param node the current node
* \param id the id to find
* \return the closest preceding finger of that id
/* Try to join the ring */
case TASK_JOIN:
next = routing_next(node, task_data->answer_id);
- XBT_DEBUG("Join request from %08x forwarding to %08x", task_data->answer_id, next);
+ XBT_DEBUG("Join request from %08x forwarding to %08x", task_data->answer_id, next);
type = TASK_JOIN_LAST_REPLY;
req_data = xbt_new0(s_task_data_t,1);
req_data->answer_id = task_data->sender_id;
req_data->steps = task_data->steps + 1;
-
+
// if next different from current node forward the join
if (next!=node->id) {
get_mailbox(next, mailbox);
task_free(task_sent);
}
type = TASK_JOIN_REPLY;
- }
-
+ }
+
// send back the current node state to the joining node
req_data->type = type;
req_data->sender_id = node->id;
print_node_namespace_set(node);
int curr_namespace_set[NAMESPACE_SIZE];
int task_namespace_set[NAMESPACE_SIZE+1];
-
+
// Copy the current namespace and the task state namespace with state->id in the middle
i=0;
for (; i<NAMESPACE_SIZE/2; i++){
}
task_namespace_set[i] = task_data->state->id;
for (; i<NAMESPACE_SIZE; i++){
- curr_namespace_set[i] = node->namespace_set[i];
+ curr_namespace_set[i] = node->namespace_set[i];
task_namespace_set[i+1] = task_data->state->namespace_set[i];
}
static int node(int argc, char *argv[])
{
double init_time = MSG_get_clock();
- msg_task_t task_received = NULL;
- int join_success = 0;
+ msg_task_t task_received = NULL;
+ int join_success = 0;
double deadline;
xbt_assert(argc == 3 || argc == 5, "Wrong number of arguments for this node");
s_node_t node = {0};
int main(int argc, char *argv[])
{
MSG_init(&argc, argv);
- xbt_assert(argc > 2,
+ xbt_assert(argc > 2,
"Usage: %s [-nb_bits=n] [-timeout=t] platform_file deployment_file\n"
- "\tExample: %s ../msg_platform.xml pastry10.xml\n",
+ "\tExample: %s ../msg_platform.xml pastry10.xml\n",
argv[0], argv[0]);
char **options = &argv[1];
/** @addtogroup MSG_examples
*
- * - <b>maestro-set/maestro-set.cpp: Switch the system thread hosting our maestro</b>.
+ * - <b>maestro-set/maestro-set.cpp: Switch the system thread hosting our maestro</b>.
* That's a very advanced example in which we move the maestro thread to another process.
* Not many users need it (maybe only one, actually), but this example is also a regression test.
- *
+ *
* This example is in C++ because we use C++11 threads to ensure that the feature is working as
* expected. You can still use that feature from a C code.
*/
XBT_LOG_NEW_DEFAULT_CATEGORY(bugged1_liveness, "my log messages");
-int r=0;
+int r=0;
int cs=0;
#ifdef GARBAGE_STACK
xbt_dynar_t requests = xbt_dynar_new(sizeof(char *), NULL);
char *req;
- while(1){
+ while(1){
MSG_task_receive(&task, "coordinator");
- const char *kind = MSG_task_get_name(task);
- if (!strcmp(kind, "request")) {
+ const char *kind = MSG_task_get_name(task);
+ if (!strcmp(kind, "request")) {
req = MSG_task_get_data(task);
- if (CS_used) {
+ if (CS_used) {
XBT_INFO("CS already used. Queue the request.");
xbt_dynar_push(requests, &req);
- } else {
+ } else {
if(strcmp(req, "1") != 0){
XBT_INFO("CS idle. Grant immediatly");
answer = MSG_task_create("grant", 0, 1000, NULL);
r=0;
XBT_INFO("Propositions changed : r=0, cs=0");
}
-
+
}
return 0;
}
int predR(void);
int predCS(void);
-#endif
+#endif
const char *kind = MSG_task_get_name(task); //is it a request or a release?
if (!strcmp(kind, "request")) { // that's a request
char *req = MSG_task_get_data(task);
- if (CS_used) {
+ if (CS_used) {
XBT_INFO("CS already used.");
msg_task_t answer = MSG_task_create("not grant", 0, 1000, NULL);
MSG_task_send(answer, req);
int predCS(void);
-#endif
+#endif
/******************** Non-deterministic message ordering *********************/
/* This example implements one process which receives messages from two other */
-/* processes. There is no bug on it, it is just provided to test the soundness*/
+/* processes. There is no bug on it, it is just provided to test the soundness*/
/* of the state space reduction with DPOR, if the maximum depth (defined with */
/* --cfg=model-check/max_depth:) is reached. */
/******************************************************************************/
XBT_LOG_NEW_DEFAULT_CATEGORY(test, "Property test");
-static void test_host(const char*hostname)
+static void test_host(const char*hostname)
{
msg_host_t thehost = MSG_host_by_name(hostname);
xbt_dict_t props = MSG_host_get_properties(thehost);
msg_sem_t sem;
static int peer(int argc, char* argv[]){
- int i = 0;
+ int i = 0;
while(i < argc) {
double wait_time = xbt_str_parse_double(argv[i],"Invalid wait time: %s");
i++;
sg_host_route_latency(hosts[0], hosts[1]));
XBT_INFO("Jupiter: speed=%.0f", sg_host_speed(hosts[0])* sg_host_get_available_speed(hosts[0]));
XBT_INFO("Tremblay: speed=%.0f", sg_host_speed(hosts[1])* sg_host_get_available_speed(hosts[1]));
-
+
unsigned int ctr;
SD_task_t task;
xbt_dynar_foreach(changed_tasks, ctr, task) {
char *tracefilename;
char *last = strrchr(argv[2], '.');
tracefilename = bprintf("%.*s.trace", (int) (last == NULL ? strlen(argv[2]) : last - argv[2]),argv[2]);
- if (argc == 4)
+ if (argc == 4)
tracefilename = xbt_strdup(argv[3]);
/* Display all the tasks */
if (!dax){
XBT_ERROR("A problem occurred during DAX parsing (cycle or syntax). Do not continue this test");
free(tracefilename);
-
+
exit(255);
}
-/* Example of scatter communication, accepting a large amount of processes.
+/* Example of scatter communication, accepting a large amount of processes.
* This based the experiment of Fig. 4 in http://hal.inria.fr/hal-00650233/
- * That experiment is a comparison to the LogOPSim simulator, that takes
+ * That experiment is a comparison to the LogOPSim simulator, that takes
* GOAL files as an input, thus the file name. But there is no actual link
* to the GOAL formalism beside of this.
*/
/*
if(nd->verified==1) fprintf(stderr,"%ld.%s\t: usable.",nd->id,nd->name);
else if(nd->verified==0) fprintf(stderr,"%ld.%s\t: unusable.",nd->id,nd->name);
- else fprintf(stderr,"%ld.%s\t: notverified.",nd->id,nd->name);
+ else fprintf(stderr,"%ld.%s\t: notverified.",nd->id,nd->name);
*/
}
}
memcpy( &(tmpnd->inArc[ tmpnd->inDegree]), nd->inArc, nd->inDegree*sizeof( DGArc *));
tmpnd->inDegree += nd->inDegree;
- }
+ }
if ( nd->outDegree > 0 ) {
tmpnd->maxOutDegree += nd->maxOutDegree;
ar =(DGArc **) calloc(tmpnd->maxOutDegree,sizeof(DGArc*));
memcpy( &(tmpnd->outArc[tmpnd->outDegree]),nd->outArc,nd->outDegree*sizeof( DGArc *));
tmpnd->outDegree += nd->outDegree;
}
- free(nd);
+ free(nd);
return i;
}
nd->id = dg->numNodes;
/*************************************************************************
- * *
+ * *
* N A S P A R A L L E L B E N C H M A R K S 3.3 *
* *
* D T *
* *
- *************************************************************************
+ *************************************************************************
* *
* This benchmark is part of the NAS Parallel Benchmark 3.3 suite. *
* *
* E-mail: npb@nas.nasa.gov *
* Fax: (650) 604-3957 *
* *
- *************************************************************************
+ *************************************************************************
* *
* Author: M. Frumkin * *
* *
numPrevLayerNodes=numLayerNodes;
}
source=newNode((char*)"Source");
- AttachNode(dg,source);
+ AttachNode(dg,source);
for(i=0;i<numPrevLayerNodes;i++){
nd=dg->node[firstLayerNode+i];
ar=newArc(source,nd);
numPrevLayerNodes=numLayerNodes;
}
sink=newNode((char*)"Sink");
- AttachNode(dg,sink);
+ AttachNode(dg,sink);
for(i=0;i<numPrevLayerNodes;i++){
nd=dg->node[firstLayerNode+i];
ar=newArc(nd,sink);
int i=0,j=0,k=0;
double rms0=0.0,rms1=0.0,rmsm1=0.0;
double weight=((double) (w+1))/(w+2);
-
+
w+=1;
if(timer_on){
timer_clear(w);
int m;
int mk=16;
int nk = (int)(pow(2,mk)),
- nq=10,
+ nq=10,
np, node, no_nodes, i, ik, kk, l, k, nit, no_large_nodes, np_add, k_offset;
int verified;
char size[500]; // mind the size of the string to represent a big number
/*************************************************************************
- * *
+ * *
* N A S P A R A L L E L B E N C H M A R K S 3.3 *
- * *
- * I S *
- * *
- *************************************************************************
- * *
+ * *
+ * I S *
+ * *
+ *************************************************************************
+ * *
* This benchmark is part of the NAS Parallel Benchmark 3.3 suite. *
- * It is described in NAS Technical Report 95-020. *
- * *
- * Permission to use, copy, distribute and modify this software *
- * for any purpose with or without fee is hereby granted. We *
- * request, however, that all derived work reference the NAS *
+ * It is described in NAS Technical Report 95-020. *
+ * *
+ * Permission to use, copy, distribute and modify this software *
+ * for any purpose with or without fee is hereby granted. We *
+ * request, however, that all derived work reference the NAS *
* Parallel Benchmarks 3.3. This software is provided "as is" *
- * without express or implied warranty. *
- * *
+ * without express or implied warranty. *
+ * *
* Information on NPB 3.3, including the technical report, the *
- * original specifications, source code, results and information *
- * on how to submit new results, is available at: *
- * *
- * http://www.nas.nasa.gov/Software/NPB *
- * *
- * Send comments or suggestions to npb@nas.nasa.gov *
- * Send bug reports to npb-bugs@nas.nasa.gov *
- * *
- * NAS Parallel Benchmarks Group *
- * NASA Ames Research Center *
- * Mail Stop: T27A-1 *
- * Moffett Field, CA 94035-1000 *
- * *
- * E-mail: npb@nas.nasa.gov *
- * Fax: (650) 604-3957 *
- * *
- *************************************************************************
- * *
- * Author: M. Yarrow *
- * H. Jin *
- * *
+ * original specifications, source code, results and information *
+ * on how to submit new results, is available at: *
+ * *
+ * http://www.nas.nasa.gov/Software/NPB *
+ * *
+ * Send comments or suggestions to npb@nas.nasa.gov *
+ * Send bug reports to npb-bugs@nas.nasa.gov *
+ * *
+ * NAS Parallel Benchmarks Group *
+ * NASA Ames Research Center *
+ * Mail Stop: T27A-1 *
+ * Moffett Field, CA 94035-1000 *
+ * *
+ * E-mail: npb@nas.nasa.gov *
+ * Fax: (650) 604-3957 *
+ * *
+ *************************************************************************
+ * *
+ * Author: M. Yarrow *
+ * H. Jin *
+ * *
*************************************************************************/
#include "smpi/mpi.h"
INT_TYPE min_key_val, max_key_val;
INT_TYPE *key_buff_ptr;
-/* Iteration alteration of keys */
+/* Iteration alteration of keys */
if(gd->my_rank == 0){
gd->key_array[iteration] = iteration;
gd->key_array[iteration+MAX_ITERATIONS] = max_key - iteration;
1220703125.00 ), /* Random number gen mult */
1220703125.00 ); /* Random number gen mult */
-/* Do one interation for free (i.e., untimed) to guarantee initialization of
+/* Do one interation for free (i.e., untimed) to guarantee initialization of
all data and code pages and respective tables */
rank(gd, 1 );
int r, cs;
int main(int argc, char **argv){
- int err, size, rank;
+ int size;
+ int rank;
int recv_buff;
MPI_Status status;
xbt_dynar_t requests = xbt_dynar_new(sizeof(int), NULL);
/* Initialize MPI */
- err = MPI_Init(&argc, &argv);
+ int err = MPI_Init(&argc, &argv);
if(err != MPI_SUCCESS){
printf("MPI initialization failed !\n");
exit(1);
int main(int argc, char **argv)
{
- int x,y, err, size, rank;
+ int x;
+ int y;
+ int size;
+ int rank;
MPI_Status status;
/* Initialize MPI */
- err = MPI_Init(&argc, &argv);
+ int err = MPI_Init(&argc, &argv);
if (err != MPI_SUCCESS) {
printf("MPI initialization failed!\n");
exit(1);
#define RELEASE_TAG 2
int main(int argc, char **argv){
- int err, size, rank;
+ int size;
+ int rank;
int recv_buff;
MPI_Status status;
int CS_used = 0;
xbt_dynar_t requests = xbt_dynar_new(sizeof(int), NULL);
/* Initialize MPI */
- err = MPI_Init(&argc, &argv);
+ int err = MPI_Init(&argc, &argv);
if(err != MPI_SUCCESS){
printf("MPI initialization failed !\n");
exit(1);
int y = 8;
int main(int argc, char **argv) {
- int recv_buff, size, rank;
+ int recv_buff;
+ int size;
+ int rank;
MPI_Status status;
MPI_Init(&argc, &argv);
int x;
int main(int argc, char **argv) {
- int recv_buff, size, rank;
+ int recv_buff;
+ int size;
+ int rank;
MPI_Status status;
MPI_Init(&argc, &argv);
int y = 0;
int main(int argc, char **argv) {
- int recv_x, recv_y, size, rank;
+ int recv_x;
+ int recv_y;
+ int size;
+ int rank;
MPI_Status status;
MPI_Init(&argc, &argv);
int x = 20;
int main(int argc, char **argv) {
- int recv_x = 1, size, rank;
+ int recv_x = 1;
+ int size;
+ int rank;
MPI_Status status;
MPI_Init(&argc, &argv);
int main(int argc, char **argv)
{
- int recv_buff, err, size, rank;
+ int recv_buff;
+ int size;
+ int rank;
MPI_Status status;
/* Initialize MPI */
- err = MPI_Init(&argc, &argv);
+ int err = MPI_Init(&argc, &argv);
if (err != MPI_SUCCESS) {
printf("MPI initialization failed!\n");
exit(1);
XBT_INFO("alltoall for rank %d", rank);
int* out=malloc(1000*size*sizeof(int));
int* in=malloc(1000*size*sizeof(int));
- MPI_Alltoall(out, 1000, MPI_INT,in, 1000, MPI_INT, MPI_COMM_WORLD);
+ MPI_Alltoall(out, 1000, MPI_INT,in, 1000, MPI_INT, MPI_COMM_WORLD);
XBT_INFO("after alltoall %d", rank);
free(out);
typedef simgrid::s4u::Actor s4u_Actor;
typedef simgrid::s4u::Host s4u_Host;
typedef simgrid::s4u::Link s4u_Link;
+typedef simgrid::s4u::File s4u_File;
typedef simgrid::s4u::Storage s4u_Storage;
typedef simgrid::s4u::NetZone s4u_NetZone;
typedef simgrid::kernel::activity::ActivityImpl* smx_activity_t;
typedef struct s4u_Actor s4u_Actor;
typedef struct s4u_Host s4u_Host;
typedef struct s4u_Link s4u_Link;
+typedef struct s4u_File s4u_File;
typedef struct s4u_Storage s4u_Storage;
typedef struct s4u_NetZone s4u_NetZone;
typedef struct kernel_Activity* smx_activity_t;
typedef s4u_Host* sg_host_t;
typedef s4u_Link* sg_link_t;
typedef s4u_Storage* sg_storage_t;
+typedef s4u_File* sg_file_t;
typedef routing_NetPoint* sg_netpoint_t;
typedef surf_Resource *sg_resource_t;
typedef msg_host_t msg_vm_t;
/* ******************************** File ************************************ */
-
-typedef struct simdata_file* simdata_file_t;
-
-typedef struct msg_file_priv {
- char *fullpath;
- sg_size_t size;
- char* mount_point;
- char* storageId;
- char* storage_type;
- int desc_id;
- void *data;
- simdata_file_t simdata;
-} s_msg_file_priv_t;
-
-typedef struct msg_file_priv* msg_file_t;
+typedef sg_file_t msg_file_t;
/* ******************************** Storage ************************************ */
SG_BEGIN_DECL()
XBT_PUBLIC(void) sg_host_energy_plugin_init();
+XBT_PUBLIC(void) sg_host_energy_update_all();
XBT_PUBLIC(double) sg_host_get_consumed_energy(sg_host_t host);
XBT_PUBLIC(double) sg_host_get_wattmin_at(sg_host_t host, int pstate);
XBT_PUBLIC(double) sg_host_get_wattmax_at(sg_host_t host, int pstate);
*/
static ActorPtr createActor(const char* name, s4u::Host* host, std::function<void()> code);
- static ActorPtr createActor(const char* name, s4u::Host* host, std::function<void(std::vector<std::string>)> code,
- std::vector<std::string> args)
+ static ActorPtr createActor(const char* name, s4u::Host* host, std::function<void(std::vector<std::string>*)> code,
+ std::vector<std::string>* args)
{
- return createActor(name, host, [code](std::vector<std::string> args) { code(args); }, args);
+ return createActor(name, host, [code](std::vector<std::string>* args) { code(args); }, args);
}
/** Create an actor using code
* This blocks the calling actor until the actor on which we call join() is terminated
*/
void join();
-
+
// Static methods on all actors:
/** Ask kindly to all actors to die. Only the issuer will survive. */
#include <xbt/base.h>
+#include "src/surf/StorageImpl.hpp"
#include <simgrid/simix.h>
namespace simgrid {
XBT_PUBLIC(msg_error_t) MSG_file_rcopy(msg_file_t fd, msg_host_t host, const char* fullpath);
XBT_PUBLIC(msg_error_t) MSG_file_rmove(msg_file_t fd, msg_host_t host, const char* fullpath);
*/
+ char* storage_type;
+ char* storageId;
+ char* mount_point;
+ int desc_id = 0;
private:
smx_file_t pimpl_ = nullptr;
* The external load (coming from an availability trace) is not taken in account.
*
* @return The number of tasks currently running on a host.
- */
+ */
public native int getLoad();
* Rendez-vous point for network communications, similar to URLs on
* which you could post and retrieve data. Actually, the mailboxes are
* not involved in the communication once it starts, but only to find
- * the contact with which you want to communicate.
+ * the contact with which you want to communicate.
* Here are some mechanisms similar to the mailbox in other
* communication systems: The phone number, which allows the caller to
* starts to flow as soon as the sender posts it, even if the receiver
* did not post its recv() yet. This can obviously lead to bad
* simulation timings, as the simulated communications do not start at
- * the exact same time than the real ones.
- *
+ * the exact same time than the real ones.
+ *
* If the simulation timings are very important to you, you can
* declare a specific receiver to a given mailbox (with the function
* setReceiver()). That way, any send() posted to that mailbox will
class Mutex;
class NetZone;
+class File;
class Storage;
XBT_PUBLIC(void) intrusive_ptr_release(Comm* c);
/** @} */
-/** @addtogroup SD_task_dependency_api
- *
+/** @addtogroup SD_task_dependency_api
+ *
* This section describes the functions for managing the dependencies between the tasks.
*
* @see SD_task_api
/********************************** File *************************************/
typedef struct s_smx_file *smx_file_t;
-/********************************** Storage *************************************/
-typedef xbt_dictelm_t smx_storage_t;
-
/* ****************************** Process *********************************** */
typedef enum {
XBT_PUBLIC(sg_size_t) simcall_file_tell(smx_file_t fd);
XBT_PUBLIC(int) simcall_file_seek(smx_file_t fd, sg_offset_t offset, int origin);
XBT_PUBLIC(int) simcall_file_move(smx_file_t fd, const char* fullpath);
-/***************************** Storage **********************************/
-XBT_PUBLIC(xbt_dict_t) simcall_storage_get_properties(smx_storage_t storage);
/************************** MC simcalls **********************************/
XBT_PUBLIC(int) simcall_mc_random(int min, int max);
MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win));
MPI_CALL(XBT_PUBLIC(int), MPI_Accumulate,( void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win));
-MPI_CALL(XBT_PUBLIC(int), MPI_Get_accumulate,( void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
- void* result_addr, int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
+MPI_CALL(XBT_PUBLIC(int), MPI_Get_accumulate,( void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
+ void* result_addr, int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win));
MPI_CALL(XBT_PUBLIC(int), MPI_Rget,( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank,
MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win, MPI_Request* request));
MPI_CALL(XBT_PUBLIC(int), MPI_Raccumulate,( void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request* request));
-MPI_CALL(XBT_PUBLIC(int), MPI_Rget_accumulate,( void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
- void* result_addr, int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
+MPI_CALL(XBT_PUBLIC(int), MPI_Rget_accumulate,( void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
+ void* result_addr, int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request* request));
MPI_CALL(XBT_PUBLIC(int), MPI_Fetch_and_op,( void *origin_addr, void* result_addr, MPI_Datatype datatype,
typedef void* MPI_Errhandler;
typedef void MPI_Comm_errhandler_function(MPI_Comm *, int *, ...);
-typedef int MPI_Grequest_query_function(void *extra_state, MPI_Status *status);
-typedef int MPI_Grequest_free_function(void *extra_state);
-typedef int MPI_Grequest_cancel_function(void *extra_state, int complete);
+typedef int MPI_Grequest_query_function(void *extra_state, MPI_Status *status);
+typedef int MPI_Grequest_free_function(void *extra_state);
+typedef int MPI_Grequest_cancel_function(void *extra_state, int complete);
#define MPI_DUP_FN MPI_Comm_dup
#define MPI_WIN_DUP_FN ((MPI_Win_copy_attr_function*)MPI_DUP_FN)
MPI_CALL(XBT_PUBLIC(int), MPI_Type_get_envelope,(MPI_Datatype datatype,int *num_integers,int *num_addresses,
int *num_datatypes, int *combiner));
MPI_CALL(XBT_PUBLIC(int), MPI_Type_get_contents,(MPI_Datatype datatype, int max_integers, int max_addresses,
- int max_datatypes, int* array_of_integers, MPI_Aint* array_of_addresses,
+ int max_datatypes, int* array_of_integers, MPI_Aint* array_of_addresses,
MPI_Datatype *array_of_datatypes));
MPI_CALL(XBT_PUBLIC(int), MPI_Type_create_darray,(int size, int rank, int ndims, int* array_of_gsizes,
int* array_of_distribs, int* array_of_dargs, int* array_of_psizes,
XBT_PUBLIC(int) smpi_sample_2(int global, const char *file, int line);
XBT_PUBLIC(void) smpi_sample_3(int global, const char *file, int line);
-/**
- * Need a public setter for SMPI copy_callback function, so users can define
+/**
+ * Need a public setter for SMPI copy_callback function, so users can define
* their own while still using default SIMIX_copy_callback for MSG copies.
*/
XBT_PUBLIC(void) smpi_comm_set_copy_data_callback(void (*callback) (smx_activity_t, void*, size_t));
-/**
+/**
* Functions for call location tracing. These functions will be
* called from the user's application! (With the __FILE__ and __LINE__ values
* passed as parameters.)
#define SMPI_VARGET_GLOBAL(name) name[smpi_process_index()]
-/**
+/**
* This is used for the old privatization method, i.e., on old
* machines that do not yet support privatization via mmap
*/
XBT_PUBLIC(void) xbt_automaton_transition_set_destination(xbt_automaton_transition_t t, xbt_automaton_state_t dst);
XBT_PUBLIC(xbt_dynar_t) xbt_automaton_state_get_out_transitions(xbt_automaton_state_t s);
XBT_PUBLIC(xbt_dynar_t) xbt_automaton_state_get_in_transitions(xbt_automaton_state_t s);
-XBT_PUBLIC(xbt_automaton_state_t) xbt_automaton_state_exists(xbt_automaton_t a, char *id);
+XBT_PUBLIC(xbt_automaton_state_t) xbt_automaton_state_exists(xbt_automaton_t a, char *id);
XBT_PUBLIC(void) xbt_automaton_display(xbt_automaton_t a);
XBT_PUBLIC(void) xbt_automaton_exp_label_display(xbt_automaton_exp_label_t l);
# define XBT_PUBLIC_CLASS class __declspec(dllimport)
# define XBT_PRIVATE
-#elif defined(__ELF__)
+#elif defined(__ELF__)
# define XBT_PUBLIC(type) __attribute__((visibility("default"))) type
# define XBT_EXPORT_NO_IMPORT(type) __attribute__((visibility("default"))) type
# define XBT_IMPORT_NO_EXPORT(type) __attribute__((visibility("default"))) type
* It is a coma (,) separated list of directives. They are applied from left to right.
*
* Each of them of form:
- *
+ *
* [-|+]suitename[:unitname[:testname]]
- *
- * * First char:
+ *
+ * * First char:
* if it's a '-', the directive disables something
* if it's a '+', the directive enables something
* By default, everything is enabled, but you can disable a suite and reenable some parts
/* Cleanup the mess */
XBT_PUBLIC(void) xbt_test_exit();
-/**
+/**
* @addtogroup XBT_cunit
* @brief Unit testing implementation (see @ref inside_tests_add_units)
- *
+ *
* This module is mainly intended to allow the tests of SimGrid itself and may lack the level of genericity that you
* would expect as a user. Only use it in external projects at your own risk (but it works rather well for us). We play
* with the idea of migrating to an external solution for our unit tests, possibly offering more features, but having
* absolutely no dependencies is a nice feature of SimGrid (and this code is sufficient to cover our needs, actually,
* so why should we bother switching?)
- *
+ *
* Unit testing is not intended to write integration tests.
* Please refer to \ref inside_tests_add_integration for that instead.
*
- * @{
+ * @{
*/
/** @brief Provide information about the suite declared in this file
* @hideinitializer
- *
+ *
* Actually, this macro is only used by the script extracting the test units, but that should be transparent for you.
*
* @param suite_name the short name of this suite, to be used in the --tests argument of testall afterward. Avoid
XBT_PUBLIC(void) _xbt_test_add(const char *file, int line, const char *fmt, ...) XBT_ATTRIB_PRINTF(3, 4);
XBT_PUBLIC(void) _xbt_test_fail(const char *file, int line, const char *fmt, ...) XBT_ATTRIB_PRINTF(3, 4);
XBT_PUBLIC(void) _xbt_test_log(const char *file, int line, const char *fmt, ...) XBT_ATTRIB_PRINTF(3, 4);
-/** @brief Declare that a new test begins (printf-like parameters, describing the test)
+/** @brief Declare that a new test begins (printf-like parameters, describing the test)
* @hideinitializer */
#define xbt_test_add(...) _xbt_test_add(__FILE__, __LINE__, __VA_ARGS__)
-/** @brief Declare that the lastly started test failed (printf-like parameters, describing failure cause)
+/** @brief Declare that the lastly started test failed (printf-like parameters, describing failure cause)
* @hideinitializer */
#define xbt_test_fail(...) _xbt_test_fail(__FILE__, __LINE__, __VA_ARGS__)
/** @brief The lastly started test is actually an assert
- * @hideinitializer
- *
+ * @hideinitializer
+ *
* - If provided a uniq parameter, this is assumed to be a condition that is expected to be true
* - If provided more parameters, the first one is a condition, and the other ones are printf-like arguments that are
* to be displayed when the condition fails.
* @brief DynArr are dynamically sized vector which may contain any type of variables.
*
* These are the SimGrid version of the dynamically size arrays, which all C programmer recode one day or another.
- *
+ *
* For performance concerns, the content of DynArr must be homogeneous (in contrary to dictionnaries -- see the
* \ref XBT_dict section). You thus have to provide the function which will be used to free the content at
* structure creation (of type void_f_pvoid_t).
* \until xbt_dynar_free
*
* \section XBT_dynar_exptr Example with pointed data
- *
+ *
* \skip test_dynar_string
* \skip dynar_t
* \until s2
* @{
*/
- /** @brief Quick retrieval of scalar content
+ /** @brief Quick retrieval of scalar content
* @hideinitializer */
# define xbt_dynar_get_as(dynar,idx,type) \
(*(type*)xbt_dynar_get_ptr((dynar),(idx)))
* @hideinitializer */
# define xbt_dynar_set_as(dynar,idx,type,val) \
(*(type*)xbt_dynar_set_at_ptr((dynar),(idx))) = val
- /** @brief Quick retrieval of scalar content
+ /** @brief Quick retrieval of scalar content
* @hideinitializer */
# define xbt_dynar_getlast_as(dynar,type) \
(*(type*)xbt_dynar_get_ptr((dynar),xbt_dynar_length(dynar)-1))
- /** @brief Quick retrieval of scalar content
+ /** @brief Quick retrieval of scalar content
* @hideinitializer */
# define xbt_dynar_getfirst_as(dynar,type) \
(*(type*)xbt_dynar_get_ptr((dynar),0))
- /** @brief Quick insertion of scalar content
+ /** @brief Quick insertion of scalar content
* @hideinitializer */
# define xbt_dynar_insert_at_as(dynar,idx,type,value) \
*(type*)xbt_dynar_insert_at_ptr(dynar,idx)=value
- /** @brief Quick insertion of scalar content
+ /** @brief Quick insertion of scalar content
* @hideinitializer */
# define xbt_dynar_push_as(dynar,type,value) \
*(type*)xbt_dynar_push_ptr(dynar)=value
XBT_PUBLIC(void) xbt_dynar_cursor_rm(xbt_dynar_t dynar, unsigned int *const cursor);
-/*
+/*
* \warning DO NOT USE THIS STRUCTURE DIRECTLY! Instead, use the public interface:
* This was made public to allow:
* - the inlining of the foreach elements
return TRUE;
}
-/** @brief Iterates over the whole dynar.
- *
+/** @brief Iterates over the whole dynar.
+ *
* @param _dynar what to iterate over
* @param _cursor an integer used as cursor
* @param _data
printf("Seen %s\n",str);
}
\endcode
- *
+ *
* Note that underneath, that's a simple for loop with no real black magic involved. It's perfectly safe to interrupt
* a foreach with a break or a return statement.
*/
* You are not expected to inherit from it. Instead of you use should
* @ref XBT_THROW an exception which will throw a subclass of your original
* exception with those additional features.
- *
+ *
* However, you can try `dynamic_cast` an exception to this type in order to
* get contextual information about the exception.
*/
/** \defgroup XBT_log_cats Existing log categories
* \ingroup XBT_log
- * \brief (automatically extracted)
+ * \brief (automatically extracted)
*
* This is the list of all existing log categories in SimGrid.
* This list is automatically extracted from the source code by the tools/doxygen/xbt_log_extract_hierarchy.pl utility.
*
- * It should thus contain every categories that are defined in the SimGrid library.
+ * It should thus contain every categories that are defined in the SimGrid library.
* If you want to see the one defined in your code in addition, provide `--help-logs` on the command line of your simulator.
*/
* \param desc string describing the purpose of this category
* \hideinitializer
*
- * Defines a new subcategory of the parent.
+ * Defines a new subcategory of the parent.
*/
#define XBT_LOG_NEW_SUBCATEGORY(catName, parent, desc) \
XBT_LOG_EXTERNAL_CATEGORY(parent); \
XBT_LOG_NEW_SUBCATEGORY_helper(catName, parent, desc) \
/**
- * \ingroup XBT_log
+ * \ingroup XBT_log
* \param catName name of new category
* \param desc string describing the purpose of this category
* \hideinitializer
XBT_LOG_NEW_SUBCATEGORY_helper(catName, XBT_LOG_ROOT_CAT, desc)
/**
- * \ingroup XBT_log
+ * \ingroup XBT_log
* \param cname name of the cat
* \hideinitializer
*
#endif
/**
- * \ingroup XBT_log
+ * \ingroup XBT_log
* \param cname name of the cat
* \param desc string describing the purpose of this category
* \hideinitializer
XBT_LOG_DEFAULT_CATEGORY(cname)
/**
- * \ingroup XBT_log
+ * \ingroup XBT_log
* \param cname name of the cat
* \param parent name of the parent
* \param desc string describing the purpose of this category
XBT_LOG_DEFAULT_CATEGORY(cname)
/**
- * \ingroup XBT_log
+ * \ingroup XBT_log
* \param cname name of the cat
* \hideinitializer
*
XBT_PUBLIC(void) xbt_log_threshold_set(xbt_log_category_t cat, e_xbt_log_priority_t thresholdPriority);
/**
- * \ingroup XBT_log_implem
+ * \ingroup XBT_log_implem
* \param cat the category (not only its name, but the variable)
* \param app the appender
*
*/
XBT_PUBLIC(void) xbt_log_appender_set(xbt_log_category_t cat, xbt_log_appender_t app);
/**
- * \ingroup XBT_log_implem
+ * \ingroup XBT_log_implem
* \param cat the category (not only its name, but the variable)
* \param lay the layout
*
XBT_PUBLIC(void) xbt_log_layout_set(xbt_log_category_t cat, xbt_log_layout_t lay);
/**
- * \ingroup XBT_log_implem
+ * \ingroup XBT_log_implem
* \param cat the category (not only its name, but the variable)
* \param additivity whether logging actions must be passed to parent.
*
*/
XBT_PUBLIC(void) xbt_log_additivity_set(xbt_log_category_t cat, int additivity);
-/** @brief create a new simple layout
+/** @brief create a new simple layout
*
* This layout is not as flexible as the pattern one
*/
/* ********************** */
/**
- * \ingroup XBT_log
+ * \ingroup XBT_log
* \param catName name of the category
* \param priority minimal priority to be enabled to return true (must be #e_xbt_log_priority_t)
* \hideinitializer
/** @addtogroup XBT_mallocator
* @brief The mallocator system
- *
+ *
* This section describes the API to a mallocator.
* A mallocator allows you to recycle the objects you don't need anymore instead of freeing them. A mallocator is a
* stack which stores the unused objects or a given type. If you often need to malloc() / free() objects of a certain
SG_BEGIN_DECL()
/** \addtogroup XBT_parmap
- * \ingroup XBT_misc
+ * \ingroup XBT_misc
* \brief Parallel map.
*
* A function is applied to all elements of a dynar in parallel with n worker threads.
SG_BEGIN_DECL()
-/**
+/**
* @addtogroup XBT_swag
* @brief a O(1) set based on linked lists
- *
+ *
* Warning, this module is done to be efficient and performs tons of cast and dirty things. So make sure you know what
* you are doing while using it.
* It is basically a fifo but with restrictions so that it can be used as a set. Any operation (add, remove, belongs)
/** @defgroup XBT_swag_type Swag types
@ingroup XBT_swag
- Specific set.
+ Specific set.
These typedefs are public so that the compiler can do his job but believe me, you don't want to try to play with
those structs directly. Use them as an abstract datatype.
void *next;
void *prev;
} s_xbt_swag_hookup_t;
-/**< This type should be added to a type that is to be used in a swag.
+/**< This type should be added to a type that is to be used in a swag.
*
* Whenever a new object with this struct is created, all fields have to be set to NULL
*
/**< A typical swag */
/* @} */
-/** @defgroup XBT_swag_func SWAG functions
+/** @defgroup XBT_swag_func SWAG functions
* @ingroup XBT_swag
-
+
* @{
*/
* \defgroup XBT_swag_curs Swag cursor
* @ingroup XBT_swag
- * Iterates over the whole swag.
+ * Iterates over the whole swag.
*
* @{ */
(obj)=(decltype(obj)) xbt_swag_getNext((obj),(swag)->offset))
#endif
/**
- * @brief A safe swag iterator
+ * @brief A safe swag iterator
* @param obj the indice of the loop
* @param obj_next the object that is right after (if any) \a obj in the swag
* @param swag what to iterate over
* @hideinitializer
- You can safely modify the \a swag while using this loop.
+ You can safely modify the \a swag while using this loop.
Well, safely... Err. You can remove \a obj without having any trouble at least. */
#ifndef __cplusplus
/** @addtogroup XBT_thread
* @brief Thread portability layer
- *
+ *
* This section describes the thread portability layer. It defines types and functions very close to the pthread API,
* but it's portable to windows too.
- *
+ *
* @{
*/
SG_BEGIN_DECL()
-/** @brief get time in seconds
+/** @brief get time in seconds
*
* gives the number of seconds since the Epoch (00:00:00 UTC, January 1, 1970).
*/
JNIEXPORT void JNICALL Java_org_simgrid_msg_Host_off(JNIEnv *env, jobject jhost) {
msg_host_t host = jhost_get_native(env, jhost);
- MSG_host_off(host);
+ MSG_host_off(host);
}
JNIEXPORT jint JNICALL Java_org_simgrid_msg_Host_getCount(JNIEnv * env, jclass cls) {
env->ReleaseStringUTFChars((jstring) jname, name);
}
+JNIEXPORT void JNICALL Java_org_simgrid_msg_Host_updateAllEnergyConsumptions(JNIEnv* env, jclass cls)
+{
+ sg_host_energy_update_all();
+}
+
JNIEXPORT jdouble JNICALL Java_org_simgrid_msg_Host_getConsumedEnergy (JNIEnv *env, jobject jhost)
{
msg_host_t host = jhost_get_native(env, jhost);
JNIEXPORT jobjectArray JNICALL Java_org_simgrid_msg_Host_all(JNIEnv *env, jclass cls);
JNIEXPORT void JNICALL Java_org_simgrid_msg_Host_setAsyncMailbox(JNIEnv * env, jclass cls_arg, jobject jname);
+JNIEXPORT void JNICALL Java_org_simgrid_msg_Host_updateAllEnergyConsumptions(JNIEnv* env, jclass cls);
JNIEXPORT jdouble JNICALL Java_org_simgrid_msg_Host_getConsumedEnergy (JNIEnv *env, jobject jhost);
JNIEXPORT void JNICALL Java_org_simgrid_msg_Host_setPstate(JNIEnv* env, jobject jhost, jint pstate);
JNIEXPORT void JNICALL Java_org_simgrid_trace_Trace_hostStateDeclare(JNIEnv * env, jclass cls, jstring js)
{
const char *s = env->GetStringUTFChars(js, 0);
- TRACE_host_state_declare(s);
+ TRACE_host_state_declare(s);
env->ReleaseStringUTFChars(js, s);
}
const char *value = env->GetStringUTFChars(js_value, 0);
const char *color = env->GetStringUTFChars(js_color, 0);
- TRACE_host_state_declare_value(state, value, color);
+ TRACE_host_state_declare_value(state, value, color);
env->ReleaseStringUTFChars(js_state, state);
env->ReleaseStringUTFChars(js_value, value);
const char *state = env->GetStringUTFChars(js_state, 0);
const char *value = env->GetStringUTFChars(js_value, 0);
- TRACE_host_set_state(host, state, value);
+ TRACE_host_set_state(host, state, value);
env->ReleaseStringUTFChars(js_host, host);
env->ReleaseStringUTFChars(js_state, state);
const char *state = env->GetStringUTFChars(js_state, 0);
const char *value = env->GetStringUTFChars(js_value, 0);
- TRACE_host_push_state(host, state, value);
+ TRACE_host_push_state(host, state, value);
env->ReleaseStringUTFChars(js_host, host);
env->ReleaseStringUTFChars(js_state, state);
const char *host = env->GetStringUTFChars(js_host, 0);
const char *state = env->GetStringUTFChars(js_state, 0);
- TRACE_host_pop_state(host, state);
+ TRACE_host_pop_state(host, state);
env->ReleaseStringUTFChars(js_host, host);
env->ReleaseStringUTFChars(js_state, state);
/** This methods returns the list of storages (names) attached to an host */
public native String[] getAttachedStorage();
- /** Returns the amount of Joules consumed by that host so far */
+ /** After this call, sg_host_get_consumed_energy() will not interrupt your process
+ * (until after the next clock update).
+ */
+ public static native void updateAllEnergyConsumptions();
+ /** Returns the amount of Joules consumed by that host so far
+ *
+ * Please note that since the consumption is lazily updated, it may require a simcall to update it.
+ * The result is that the actor requesting this value will be interrupted,
+ * the value will be updated in kernel mode before returning the control to the requesting actor.
+ */
public native double getConsumedEnergy();
/** Returns the current pstate */
/* Copyright (c) 2010-2016. The SimGrid Team.
- * All rights reserved.
+ * All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the license (GNU LGPL) which comes with this package. */
snprintf(buff, 4, "nil");
break;
- case LUA_TNUMBER:
+ case LUA_TNUMBER:
snprintf(buff, 64, "%.3f", lua_tonumber(L, index));
break;
if (lua_istable(L, i)) {
lua_pushnil(L); /* table nil */
- //lua_next pops the topmost element from the stack and
+ //lua_next pops the topmost element from the stack and
//gets the next pair from the table
while (lua_next(L, -1)) { /* table key val */
// we need to copy here, as a cast from "Number" to "String"
* @brief Dumps a lua table with XBT_DEBUG
*
* This function can be called from within lua via "simgrid.dump(table)". It will
- * then dump the table via XBT_DEBUG
+ * then dump the table via XBT_DEBUG
*/
static int dump(lua_State* L) {
int argc = lua_gettop(L);
if (lua_istable(L, i)) {
lua_pushnil(L); /* table nil */
- //lua_next pops the topmost element from the stack and
+ //lua_next pops the topmost element from the stack and
//gets the next pair from the table at the specified index
while (lua_next(L, i)) { /* table key val */
// we need to copy here, as a cast from "Number" to "String"
SG_BEGIN_DECL()
-/********************************** Configuration of MC **************************************/
+/********************************** Configuration of MC **************************************/
extern XBT_PUBLIC(int) _sg_do_model_check;
extern XBT_PRIVATE int _sg_do_model_check_record;
extern XBT_PRIVATE int _sg_mc_checkpoint;
}
}
-/** @addtogroup SURF_lmm
- * @details
+/** @addtogroup SURF_lmm
+ * @details
* A linear maxmin solver to resolve inequations systems.
- *
+ *
* Most SimGrid model rely on a "fluid/steady-state" modeling that simulate the sharing of resources between actions at
* relatively coarse-grain. Such sharing is generally done by solving a set of linear inequations. Let's take an
* example and assume we have the variables \f$x_1\f$, \f$x_2\f$, \f$x_3\f$, and \f$x_4\f$ . Let's say that \f$x_1\f$
* This is called *max-min fairness* and is the most commonly used objective in SimGrid. Another possibility is to
* maximize \f$\sum_if(x_i)\f$, where \f$f\f$ is a strictly increasing concave function.
*
- * Constraint:
+ * Constraint:
* - bound (set)
* - shared (set)
* - usage (computed)
*
* Element:
* - value (set)
- *
+ *
* A possible system could be:
* - three variables: `var1`, `var2`, `var3`
* - two constraints: `cons1`, `cons2`
* - `elem2` linking `var2` and `cons1`
* - `elem3` linking `var2` and `cons2`
* - `elem4` linking `var3` and `cons2`
- *
+ *
* And the corresponding inequations will be:
- *
+ *
* var1.value <= var1.bound
* var2.value <= var2.bound
* var3.value <= var3.bound
* var1.weight * var1.value * elem1.value + var2.weight * var2.value * elem2.value <= cons1.bound
* var2.weight * var2.value * elem3.value + var3.weight * var3.value * elem4.value <= cons2.bound
- *
+ *
* where `var1.value`, `var2.value` and `var3.value` are the unknown values.
- *
- * If a constraint is not shared, the sum is replaced by a max.
- * For example, a third non-shared constraint `cons3` and the associated elements `elem5` and `elem6` could write as:
+ *
+ * If a constraint is not shared, the sum is replaced by a max.
+ * For example, a third non-shared constraint `cons3` and the associated elements `elem5` and `elem6` could write as:
*
* max( var1.weight * var1.value * elem5.value , var3.weight * var3.value * elem6.value ) <= cons3.bound
*
* (lmm_solve()) activates it when appropriate. It is possible that the variable is again disabled, e.g. to model the
* pausing of an action.
*
- * Concurrency limit and maximum
- *
- * We call concurrency, the number of variables that can be enabled at any time for each constraint.
+ * Concurrency limit and maximum
+ *
+ * We call concurrency, the number of variables that can be enabled at any time for each constraint.
* From a model perspective, this "concurrency" often represents the number of actions that actually compete for one
* constraint.
* The LMM solver is able to limit the concurrency for each constraint, and to monitor its maximum value.
- *
+ *
* One may want to limit the concurrency of constraints for essentially three reasons:
* - Keep LMM system in a size that can be solved (it does not react very well with tens of thousands of variables per
* constraint)
- * - Stay within parameters where the fluid model is accurate enough.
+ * - Stay within parameters where the fluid model is accurate enough.
* - Model serialization effects
*
* The concurrency limit can also be set to a negative value to disable concurrency limit. This can improve performance
* slightly.
- *
+ *
* Overall, each constraint contains three fields related to concurrency:
* - concurrency_limit which is the limit enforced by the solver
* - concurrency_current which is the current concurrency
- * - concurrency_maximum which is the observed maximum concurrency
+ * - concurrency_maximum which is the observed maximum concurrency
*
- * Variables also have one field related to concurrency: concurrency_share.
+ * Variables also have one field related to concurrency: concurrency_share.
* In effect, in some cases, one variable is involved multiple times (i.e. two elements) in a constraint.
* For example, cross-traffic is modeled using 2 elements per constraint.
* concurrency_share formally corresponds to the maximum number of elements that associate the variable and any given
XBT_PUBLIC_DATA(double) sg_maxmin_precision;
XBT_PUBLIC_DATA(double) sg_surf_precision;
XBT_PUBLIC_DATA(int) sg_concurrency_limit;
-
+
static inline void double_update(double *variable, double value, double precision)
{
//printf("Updating %g -= %g +- %g\n",*variable,value,precision);
* @brief Create a new Linear MaxMin constraint
* @param sys The system in which we add a constraint
* @param id Data associated to the constraint (e.g.: a network link)
- * @param bound_value The bound value of the constraint
+ * @param bound_value The bound value of the constraint
*/
XBT_PUBLIC(lmm_constraint_t) lmm_constraint_new(lmm_system_t sys, void *id,double bound_value);
XBT_PUBLIC(int) lmm_get_number_of_cnst_from_var(lmm_system_t sys, lmm_variable_t var);
/**
- * @brief Get a var associated to a constraint
+ * @brief Get a var associated to a constraint
* @details Get the first variable of the next variable of elem if elem is not NULL
* @param sys The system associated to the variable (not used)
* @param cnst A constraint
* @brief Get the next active constraint of a constraint in a system
* @param sys A system
* @param cnst An active constraint of the system
- *
+ *
* @return The next active constraint
*/
XBT_PUBLIC(lmm_constraint_t) lmm_get_next_active_constraint(lmm_system_t sys, lmm_constraint_t cnst);
/* prevent 0.0000 in the trace - this was the behavior before the transition to c++ */
if (event->timestamp < 1e-12)
stream << 0;
- else
+ else
stream << event->timestamp;
-}
+}
/* internal do the instrumentation module */
static void insert_into_buffer (PajeEvent* tbi)
}
type_t PJ_type_event_new (const char *name, type_t father)
-{
+{
if (name == nullptr){
THROWF (tracing_error, 0, "can't create an event type with a nullptr name");
}
typedef s_container *container_t;
class s_container {
- public:
+ public:
sg_netpoint_t netpoint;
char *name; /* Unique name of this container */
char *id; /* Unique id of this container */
//--------------------------------------------------
+class DefineVariableTypeEvent : public PajeEvent
+{
+ public:
+ type_t type;
+ DefineVariableTypeEvent(type_t type);
+ void print() override;
+};
+//--------------------------------------------------
+
+class DefineStateTypeEvent : public PajeEvent {
+ type_t type;
+ public:
+ DefineStateTypeEvent(type_t type);
+ void print() override;
+};
+
class DefineEventTypeEvent : public PajeEvent {
type_t type;
- public:
+ public:
DefineEventTypeEvent(type_t type);
void print() override;
};
public:
container_t container;
type_t type;
- double value;
+ double value;
public:
SubVariableEvent(double timestamp, container_t container, type_t type, double value);
void print() override;
/* This program is free software; you can redistribute it and/or modify it
* under the terms of the license (GNU LGPL) which comes with this package. */
-#include "src/internal_config.h"
+#include "src/internal_config.h"
#include "xbt/parmap.h"
/** @brief Fast context switching inspired from SystemV ucontexts.
*
- * The main difference to the System V context is that Raw Contexts are much faster because they don't
+ * The main difference to the System V context is that Raw Contexts are much faster because they don't
* preserve the signal mask when switching. This saves a system call (at least on Linux) on each context switch.
*/
class RawContext : public Context {
protected:
- void* stack_ = nullptr;
+ void* stack_ = nullptr;
/** pointer to top the stack stack */
void* stack_top_ = nullptr;
public:
int cmp = current_var.name.compare(name);
if (cmp == 0) {
-
+
// Find the whole range:
size_type first = cursor;
while (first != 0 && this->global_variables[first - 1].name == name)
size_type last = cursor;
while (last != size - 1 && this->global_variables[last + 1].name == name)
last++;
-
+
// Remove the whole range:
this->global_variables.erase(
this->global_variables.begin() + first,
this->global_variables.begin() + last + 1);
-
+
return;
} else if (cmp < 0)
first = cursor + 1;
*
* We need to efficiently find the function from any given instruction
* address inside its range. This index is sorted by low_pc
- *
+ *
* The entries are sorted by low_pc and a binary search can be used to look
* them up. In order to have a better cache locality, we only keep the
* information we need for the lookup in this vector. We could probably
XBT_TEST_UNIT("base", test_mc_page_store, "Test adding/removing pages in the store")
{
using simgrid::mc::PageStore;
-
+
xbt_test_add("Init");
std::size_t pagesize = (size_t) getpagesize();
std::unique_ptr<PageStore> store
this->refresh_malloc_info();
return this->heap_info.data();
}
-
+
void clear_cache()
{
this->cache_flags_ = Process::cache_none;
region.page_data(std::move(page_data));
return region;
}
-
+
}
}
page_numbers_.clear();
privatized_regions_.clear();
}
-
+
void flat_data(Buffer data)
{
storage_type_ = StorageType::Flat;
{
return new SafetyChecker(session);
}
-
+
}
}
static void snapshot_handle_ignore(simgrid::mc::Snapshot* snapshot)
{
xbt_assert(snapshot->process());
-
+
// Copy the memory:
for (auto const& region : mc_model_checker->process().ignored_regions()) {
s_mc_snapshot_ignored_data_t ignored_data;
xbt_die("FD snapshot not implemented in client/server mode.");
for (auto const& fd : snapshot->current_fds) {
-
+
int new_fd = open(fd.filename.c_str(), fd.flags);
if (new_fd < 0)
xbt_die("Could not reopen the file %s fo restoring the file descriptor", fd.filename.c_str());
static std::atomic_ullong counter{0};
task->counter = counter++;
task->category = nullptr;
-
+
if(MC_is_active())
MC_ignore_heap(&(task->counter), sizeof(task->counter));
*
* \include simgrid.dtd
*
- * Here is a small example of such a platform
+ * Here is a small example of such a platform
*
* \include msg/masterslave/deployment_masterslave.xml
*
/** \ingroup msg_simulation
* \brief Registers the main function of a process in a global table.
*
- * Registers a code function in a global table.
- * This table is then used by #MSG_launch_application.
+ * Registers a code function in a global table.
+ * This table is then used by #MSG_launch_application.
* \param name the reference name of the function.
* \param code the function (must have the same prototype than the main function of any C program: int ..(int argc, char *argv[]))
*/
*
* \include simgrid.dtd
*
- * Here is a small example of such a platform
+ * Here is a small example of such a platform
*
* \include small_platform.xml
*
}
catch(xbt_ex& e) {
if (e.category == cancel_error) {
- XBT_DEBUG("According to the JAVA API, a sleep call should only deal with HostFailureException, I'm lost.");
+ XBT_DEBUG("According to the JAVA API, a sleep call should only deal with HostFailureException, I'm lost.");
// adsein: MSG_TASK_CANCELED is assigned when someone kills the process that made the sleep, this is not
// correct. For instance, when the node is turned off, the error should be MSG_HOST_FAILURE, which is by the way
// and according to the JAVA document, the only exception that can be triggered by MSG_Process_sleep call.
// To avoid possible impacts in the code, I just raised a host_failure exception for the moment in the JAVA code
// and did not change anythings at the C level.
- // See comment in the jmsg_process.c file, function JNIEXPORT void JNICALL Java_org_simgrid_msg_Process_sleep(JNIEnv *env, jclass cls, jlong jmillis, jint jnanos)
+ // See comment in the jmsg_process.c file, function JNIEXPORT void JNICALL Java_org_simgrid_msg_Process_sleep(JNIEnv *env, jclass cls, jlong jmillis, jint jnanos)
status = MSG_TASK_CANCELED;
} else
throw;
/** @addtogroup m_host_management
* (#msg_host_t) and the functions for managing it.
- *
+ *
* A <em>location</em> (or <em>host</em>) is any possible place where a process may run. Thus it may be represented
* as a <em>physical resource with computing capabilities</em>, some <em>mailboxes</em> to enable running process to
* communicate with remote ones, and some <em>private data</em> that can be only accessed by local process.
* under the terms of the license (GNU LGPL) which comes with this package. */
#include "../surf/StorageImpl.hpp"
+#include "simgrid/s4u/File.hpp"
#include "simgrid/s4u/Host.hpp"
#include "simgrid/s4u/Storage.hpp"
#include "src/msg/msg_private.h"
* \see #msg_file_t
*/
-/********************************* File **************************************/
-void __MSG_file_get_info(msg_file_t fd){
-
- xbt_dynar_t info = simcall_file_get_info(fd->simdata->smx_file);
- sg_size_t *psize;
-
- fd->storage_type = xbt_dynar_pop_as(info, char*);
- fd->storageId = xbt_dynar_pop_as(info, char*);
- fd->mount_point = xbt_dynar_pop_as(info, char*);
- psize = xbt_dynar_pop_as(info, sg_size_t*);
- fd->size = *psize;
- xbt_free(psize);
- xbt_dynar_free_container(&info);
-}
-
static int MSG_host_get_file_descriptor_id(msg_host_t host)
{
simgrid::MsgHostExt* priv = host->extension<simgrid::MsgHostExt>();
*/
msg_error_t MSG_file_set_data(msg_file_t fd, void *data)
{
- fd->data = data;
+ fd->setUserdata(data);
return MSG_OK;
}
*/
void *MSG_file_get_data(msg_file_t fd)
{
- return fd->data;
+ return fd->userdata();
}
/** \ingroup msg_file
* \param fd is a the file descriptor
*/
void MSG_file_dump (msg_file_t fd){
- /* Update the cached information first */
- __MSG_file_get_info(fd);
-
XBT_INFO("File Descriptor information:\n"
"\t\tFull path: '%s'\n"
"\t\tSize: %llu\n"
"\t\tStorage Id: '%s'\n"
"\t\tStorage Type: '%s'\n"
"\t\tFile Descriptor Id: %d",
- fd->fullpath, fd->size, fd->mount_point, fd->storageId, fd->storage_type, fd->desc_id);
+ fd->path(), fd->size(), fd->mount_point, fd->storageId, fd->storage_type, fd->desc_id);
}
/** \ingroup msg_file
{
sg_size_t read_size;
- if (fd->size == 0) /* Nothing to read, return */
+ if (fd->size() == 0) /* Nothing to read, return */
return 0;
/* Find the host where the file is physically located and read it */
msg_storage_t storage_src = simgrid::s4u::Storage::byName(fd->storageId);
msg_host_t attached_host = MSG_host_by_name(storage_src->host());
- read_size = simcall_file_read(fd->simdata->smx_file, size, attached_host);
+ read_size = fd->read(size); // TODO re-add attached_host;
- if (strcmp(storage_src->host(), MSG_host_self()->cname())) {
+ if (strcmp(attached_host->cname(), MSG_host_self()->cname())) {
/* the file is hosted on a remote host, initiate a communication between src and dest hosts for data transfer */
- XBT_DEBUG("File is on %s remote host, initiate data transfer of %llu bytes.", storage_src->host(), read_size);
+ XBT_DEBUG("File is on %s remote host, initiate data transfer of %llu bytes.", attached_host->cname(), read_size);
msg_host_t m_host_list[] = {MSG_host_self(), attached_host};
double flops_amount[] = {0, 0};
double bytes_amount[] = {0, 0, static_cast<double>(read_size), 0};
msg_storage_t storage_src = simgrid::s4u::Storage::byName(fd->storageId);
msg_host_t attached_host = MSG_host_by_name(storage_src->host());
- if (strcmp(storage_src->host(), MSG_host_self()->cname())) {
+ if (strcmp(attached_host->cname(), MSG_host_self()->cname())) {
/* the file is hosted on a remote host, initiate a communication between src and dest hosts for data transfer */
- XBT_DEBUG("File is on %s remote host, initiate data transfer of %llu bytes.", storage_src->host(), size);
+ XBT_DEBUG("File is on %s remote host, initiate data transfer of %llu bytes.", attached_host->cname(), size);
msg_host_t m_host_list[] = {MSG_host_self(), attached_host};
double flops_amount[] = {0, 0};
double bytes_amount[] = {0, static_cast<double>(size), 0, 0};
}
}
/* Write file on local or remote host */
- sg_size_t offset = simcall_file_tell(fd->simdata->smx_file);
- sg_size_t write_size = simcall_file_write(fd->simdata->smx_file, size, attached_host);
- fd->size = offset + write_size;
+ // sg_size_t offset = fd->tell();
+ sg_size_t write_size = fd->write(size); // TODO readd attached_host;
return write_size;
}
*/
msg_file_t MSG_file_open(const char* fullpath, void* data)
{
- msg_file_t fd = xbt_new(s_msg_file_priv_t, 1);
- fd->data = data;
- fd->fullpath = xbt_strdup(fullpath);
- fd->simdata = xbt_new0(s_simdata_file_t, 1);
- fd->simdata->smx_file = simcall_file_open(fullpath, MSG_host_self());
+ msg_file_t fd = new simgrid::s4u::File(fullpath, MSG_host_self());
fd->desc_id = MSG_host_get_file_descriptor_id(MSG_host_self());
-
- __MSG_file_get_info(fd);
-
return fd;
}
*/
int MSG_file_close(msg_file_t fd)
{
- if (fd->data)
- xbt_free(fd->data);
-
- int res = simcall_file_close(fd->simdata->smx_file, MSG_host_self());
- MSG_host_release_file_descriptor_id(MSG_host_self(), fd->desc_id);
- __MSG_file_destroy(fd);
+ delete fd;
- return res;
+ return MSG_OK;
}
/** \ingroup msg_file
msg_error_t MSG_file_unlink(msg_file_t fd)
{
/* Find the host where the file is physically located (remote or local)*/
- msg_storage_t storage_src = simgrid::s4u::Storage::byName(fd->storageId);
- msg_host_t attached_host = MSG_host_by_name(storage_src->host());
- int res = simcall_file_unlink(fd->simdata->smx_file, attached_host);
- __MSG_file_destroy(fd);
- return static_cast<msg_error_t>(res);
+ // msg_storage_t storage_src = simgrid::s4u::Storage::byName(fd->storageId);
+ // msg_host_t attached_host = MSG_host_by_name(storage_src->host());
+ fd->unlink(); // simcall_file_unlink(fd->simdata->smx_file, attached_host);
+ return MSG_OK;
}
/** \ingroup msg_file
* \return the size of the file (as a #sg_size_t)
*/
sg_size_t MSG_file_get_size(msg_file_t fd){
- return simcall_file_get_size(fd->simdata->smx_file);
+ return fd->size();
}
/**
*/
msg_error_t MSG_file_seek(msg_file_t fd, sg_offset_t offset, int origin)
{
- return static_cast<msg_error_t>(simcall_file_seek(fd->simdata->smx_file, offset, origin));
+ fd->seek(offset); // TODO re-add origin
+ return MSG_OK;
}
/**
*/
sg_size_t MSG_file_tell(msg_file_t fd)
{
- return simcall_file_tell(fd->simdata->smx_file);
+ return fd->tell();
}
const char *MSG_file_get_name(msg_file_t fd) {
xbt_assert((fd != nullptr), "Invalid parameters");
- return fd->fullpath;
+ return fd->path();
}
/**
*/
msg_error_t MSG_file_move (msg_file_t fd, const char* fullpath)
{
- return static_cast<msg_error_t>(simcall_file_move(fd->simdata->smx_file, fullpath));
+ fd->move(fullpath);
+ return MSG_OK;
}
/**
msg_storage_t storage_src = simgrid::s4u::Storage::byName(file->storageId);
msg_host_t attached_host = MSG_host_by_name(storage_src->host());
MSG_file_seek(file, 0, SEEK_SET);
- sg_size_t read_size = simcall_file_read(file->simdata->smx_file, file->size, attached_host);
+ sg_size_t read_size = file->read(file->size());
/* Find the real host destination where the file will be physically stored */
xbt_dict_cursor_t cursor = nullptr;
return MSG_TASK_CANCELED;
}
- XBT_DEBUG("Initiate data transfer of %llu bytes between %s and %s.", read_size, storage_src->host(),
+ XBT_DEBUG("Initiate data transfer of %llu bytes between %s and %s.", read_size, attached_host->cname(),
storage_dest->host());
msg_host_t m_host_list[] = {attached_host, host_dest};
double flops_amount[] = {0, 0};
return res;
}
-/**
- * \brief Destroys a file (internal call only)
- */
-void __MSG_file_destroy(msg_file_t file)
-{
- xbt_free(file->fullpath);
- xbt_free(file->simdata);
- xbt_free(file);
-}
-
/********************************* Storage **************************************/
/** @addtogroup msg_storage_management
* (#msg_storage_t) and the functions for managing it.
bool Comm::test() {
xbt_assert(state_ == inited || state_ == started || state_ == finished);
-
- if (state_ == finished)
+
+ if (state_ == finished)
xbt_die("Don't call test on a finished comm.");
-
+
if (state_ == inited) {
this->start();
}
-
+
if(simcall_comm_test(pimpl_)){
state_ = finished;
pimpl_->unref();
timeout = timeout_time - now;
return this->wait_for(lock, timeout);
}
-
+
/**
* Notify functions
*/
void ConditionVariable::notify_one() {
simcall_cond_signal(cond_);
}
-
+
void ConditionVariable::notify_all() {
simcall_cond_broadcast(cond_);
}
#include "simgrid/s4u/File.hpp"
#include "simgrid/s4u/Host.hpp"
#include "simgrid/s4u/Mailbox.hpp"
+#include "simgrid/s4u/Storage.hpp"
XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_file,"S4U files");
-
namespace simgrid {
namespace s4u {
File::File(const char* fullpath, void* userdata) : path_(fullpath), userdata_(userdata)
{
// this cannot fail because we get a xbt_die if the mountpoint does not exist
- pimpl_ = simcall_file_open(fullpath, Host::current());
+ pimpl_ = simcall_file_open(fullpath, Host::current());
+ xbt_dynar_t info = simcall_file_get_info(pimpl_);
+ storage_type = xbt_dynar_pop_as(info, char*);
+ storageId = xbt_dynar_pop_as(info, char*);
+ mount_point = xbt_dynar_pop_as(info, char*);
+ xbt_dynar_free(&info);
}
File::~File() {
+ // Host::current()->extension<simgrid::MsgHostExt>()->file_descriptor_table->push_back(desc_id_);
simcall_file_close(pimpl_, Host::current());
}
*/
void Host::attachedStorages(std::vector<const char*>* storages)
{
- simgrid::simix::kernelImmediate([this, storages] {
- this->pimpl_->getAttachedStorageList(storages);
+ simgrid::simix::kernelImmediate([this, storages] {
+ this->pimpl_->getAttachedStorageList(storages);
});
}
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
+ * if you want the limit (max/min) macros for int types.
*/
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS 1
typedef signed char flex_int8_t;
typedef short int flex_int16_t;
typedef int flex_int32_t;
-typedef unsigned char flex_uint8_t;
+typedef unsigned char flex_uint8_t;
typedef unsigned short int flex_uint16_t;
typedef unsigned int flex_uint32_t;
/* Note: We specifically omit the test for yy_rule_can_match_eol because it requires
* access to the local variable yy_act. Since yyless() is a macro, it would break
- * existing scanners that call yyless() from OUTSIDE dax_lex.
+ * existing scanners that call yyless() from OUTSIDE dax_lex.
* One obvious solution it to make yy_act a global. I tried that, and saw
* a 5% performance hit in a non-dax_lineno scanner, because yy_act is
* normally declared as a register variable-- so it is not worth it.
if ( *p == '\n' )\
--dax_lineno;\
}while(0)
-
+
/* Return all but the first "n" matched characters back to the input stream. */
#define yyless(n) \
do \
int yy_bs_lineno; /**< The line count. */
int yy_bs_column; /**< The column count. */
-
+
/* Whether to try to fill the input buffer when we reach the
* end of it.
*/
/* Table of booleans, true if rule could match eol. */
static yyconst flex_int32_t yy_rule_can_match_eol[141] =
{ 0,
-0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
- 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
- 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1,
+0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
+ 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
+ 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1,
0, };
static yy_state_type yy_last_accepting_state;
* FleXML is Copyright (C) 1999-2005 Kristoffer Rose. All rights reserved.
* FleXML is Copyright (C) 2003-2013 Martin Quinson. All rights reserved.
* (1.9.6).
- *
+ *
* There are two, intertwined parts to this program, part A and part B.
*
* Part A
* ------
- *
- * Some parts, here collectively called "Part A", are found in the
+ *
+ * Some parts, here collectively called "Part A", are found in the
* FleXML package. They are Copyright (C) 1999-2005 Kristoffer Rose
* and Copyright (C) 2003-2013 Martin Quinson. All rights reserved.
*
* Notice that these are explicit rights granted to you for files
* generated by the FleXML system. For your rights in connection with
* the FleXML system itself please consult the GNU General Public License.
- *
+ *
* Part B
* ------
- *
- * The other parts, here collectively called "Part B", and which came
- * from the DTD used by FleXML to generate this program, can be
+ *
+ * The other parts, here collectively called "Part B", and which came
+ * from the DTD used by FleXML to generate this program, can be
* distributed (or not, as the case may be) under the terms of whoever
- * wrote them, provided these terms respect and obey the two conditions
+ * wrote them, provided these terms respect and obey the two conditions
* above under the heading "Part A".
*
* The author of and contributors to FleXML specifically disclaim
- * any copyright interest in "Part B", unless "Part B" was written
+ * any copyright interest in "Part B", unless "Part B" was written
* by the author of or contributors to FleXML.
- *
+ *
*/
/* Version strings. */
#include <assert.h>
#include <stdarg.h>
#include <ctype.h>
-
+
#if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(__TOS_WIN__)
# ifndef __STRICT_ANSI__
# include <io.h>
#else
# include <unistd.h>
#endif
-
+
#ifndef FLEXML_INDEXSTACKSIZE
#define FLEXML_INDEXSTACKSIZE 1000
#endif
{
ck_ilimit();
indexstack[inext++] = p;
- indexstack[inext++] = bnext;
+ indexstack[inext++] = bnext;
}
static int popbuffer(void)
#endif
#ifndef YY_NO_UNPUT
-
+
#endif
#ifndef yytext_ptr
static int yy_start_stack_ptr = 0;
static int yy_start_stack_depth = 0;
static int *yy_start_stack = NULL;
-
+
static void yy_push_state (int _new_state );
-
+
static void yy_pop_state (void );
-
+
/* Amount of stuff to slurp up with each read. */
#ifndef YY_READ_BUF_SIZE
#ifdef __ia64__
yy_state_type yy_current_state;
char *yy_cp, *yy_bp;
int yy_act;
-
+
if ( !(yy_init) )
{
(yy_init) = 1;
yy_size_t yyl;
for ( yyl = 0; yyl < dax_leng; ++yyl )
if ( dax_text[yyl] == '\n' )
-
+
dax_lineno++;
;
}
case 11:
/* rule 11 can match eol */
YY_RULE_SETUP
-SET(ROOT_dax__adag);
+SET(ROOT_dax__adag);
YY_BREAK
case 12:
/* rule 12 can match eol */
{
yy_state_type yy_current_state;
char *yy_cp;
-
+
yy_current_state = (yy_start);
for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp )
{
int c;
-
+
*(yy_c_buf_p) = (yy_hold_char);
if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR )
(yy_hold_char) = *++(yy_c_buf_p);
if ( c == '\n' )
-
+
dax_lineno++;
;
/** Immediately switch to a different input stream.
* @param input_file A readable stream.
- *
+ *
* @note This function does not reset the start condition to @c INITIAL .
*/
void dax_restart (FILE * input_file )
{
-
+
if ( ! YY_CURRENT_BUFFER ){
dax_ensure_buffer_stack ();
YY_CURRENT_BUFFER_LVALUE =
/** Switch to a different input buffer.
* @param new_buffer The new input buffer.
- *
+ *
*/
void dax__switch_to_buffer (YY_BUFFER_STATE new_buffer )
{
-
+
/* TODO. We should be able to replace this entire function body
* with
* dax_pop_buffer_state();
/** Allocate and initialize an input buffer state.
* @param file A readable stream.
* @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
- *
+ *
* @return the allocated buffer state.
*/
YY_BUFFER_STATE dax__create_buffer (FILE * file, int size )
{
YY_BUFFER_STATE b;
-
+
b = (YY_BUFFER_STATE) dax_alloc(sizeof( struct yy_buffer_state ) );
if ( ! b )
YY_FATAL_ERROR( "out of dynamic memory in dax__create_buffer()" );
/** Destroy the buffer.
* @param b a buffer created with dax__create_buffer()
- *
+ *
*/
void dax__delete_buffer (YY_BUFFER_STATE b )
{
-
+
if ( ! b )
return;
{
int oerrno = errno;
-
+
dax__flush_buffer(b );
b->yy_input_file = file;
}
b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0;
-
+
errno = oerrno;
}
/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
* @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
- *
+ *
*/
void dax__flush_buffer (YY_BUFFER_STATE b )
{
* the current state. This function will allocate the stack
* if necessary.
* @param new_buffer The new state.
- *
+ *
*/
void dax_push_buffer_state (YY_BUFFER_STATE new_buffer )
{
/** Removes and deletes the top of the stack, if present.
* The next element becomes the new top.
- *
+ *
*/
void dax_pop_buffer_state (void)
{
static void dax_ensure_buffer_stack (void)
{
yy_size_t num_to_alloc;
-
+
if (!(yy_buffer_stack)) {
/* First allocation is just for 2 elements, since we don't know if this
);
if ( ! (yy_buffer_stack) )
YY_FATAL_ERROR( "out of dynamic memory in dax_ensure_buffer_stack()" );
-
+
memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*));
-
+
(yy_buffer_stack_max) = num_to_alloc;
(yy_buffer_stack_top) = 0;
return;
/** Setup the input buffer state to scan directly from a user-specified character buffer.
* @param base the character buffer
* @param size the size in bytes of the character buffer
- *
- * @return the newly allocated buffer state object.
+ *
+ * @return the newly allocated buffer state object.
*/
YY_BUFFER_STATE dax__scan_buffer (char * base, yy_size_t size )
{
YY_BUFFER_STATE b;
-
+
if ( size < 2 ||
base[size-2] != YY_END_OF_BUFFER_CHAR ||
base[size-1] != YY_END_OF_BUFFER_CHAR )
/** Setup the input buffer state to scan a string. The next call to dax_lex() will
* scan from a @e copy of @a str.
* @param yystr a NUL-terminated string to scan
- *
+ *
* @return the newly allocated buffer state object.
* @note If you want to scan bytes that may contain NUL values, then use
* dax__scan_bytes() instead.
*/
YY_BUFFER_STATE dax__scan_string (yyconst char * yystr )
{
-
+
return dax__scan_bytes(yystr,strlen(yystr) );
}
* scan from a @e copy of @a bytes.
* @param yybytes the byte buffer to scan
* @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
- *
+ *
* @return the newly allocated buffer state object.
*/
YY_BUFFER_STATE dax__scan_bytes (yyconst char * yybytes, yy_size_t _yybytes_len )
char *buf;
yy_size_t n;
yy_size_t i;
-
+
/* Get memory for full buffer, including space for trailing EOB's. */
n = _yybytes_len + 2;
buf = (char *) dax_alloc(n );
/* Accessor methods (get/set functions) to struct members. */
/** Get the current line number.
- *
+ *
*/
int dax_get_lineno (void)
{
-
+
return dax_lineno;
}
/** Get the input stream.
- *
+ *
*/
FILE *dax_get_in (void)
{
}
/** Get the output stream.
- *
+ *
*/
FILE *dax_get_out (void)
{
}
/** Get the length of the current token.
- *
+ *
*/
yy_size_t dax_get_leng (void)
{
}
/** Get the current token.
- *
+ *
*/
char *dax_get_text (void)
/** Set the current line number.
* @param _line_number line number
- *
+ *
*/
void dax_set_lineno (int _line_number )
{
-
+
dax_lineno = _line_number;
}
/** Set the input stream. This does not discard the current
* input buffer.
* @param _in_str A readable stream.
- *
+ *
* @see dax__switch_to_buffer
*/
void dax_set_in (FILE * _in_str )
/* We do not touch dax_lineno unless the option is enabled. */
dax_lineno = 1;
-
+
(yy_buffer_stack) = 0;
(yy_buffer_stack_top) = 0;
(yy_buffer_stack_max) = 0;
/* dax_lex_destroy is for both reentrant and non-reentrant scanners. */
int dax_lex_destroy (void)
{
-
+
/* Pop the buffer stack, destroying each element. */
while(YY_CURRENT_BUFFER){
dax__delete_buffer(YY_CURRENT_BUFFER );
#ifndef yytext_ptr
static void yy_flex_strncpy (char* s1, yyconst char * s2, int n )
{
-
+
int i;
for ( i = 0; i < n; ++i )
s1[i] = s2[i];
void *dax_realloc (void * ptr, yy_size_t size )
{
-
+
/* The cast to (char *) in the following accommodates both
* implementations that use char* generic pointers, and those
* that use void* generic pointers. It works with the latter
* FleXML is Copyright (C) 1999-2005 Kristoffer Rose. All rights reserved.
* FleXML is Copyright (C) 2003-2013 Martin Quinson. All rights reserved.
* (1.9.6).
- *
+ *
* There are two, intertwined parts to this program, part A and part B.
*
* Part A
* ------
- *
- * Some parts, here collectively called "Part A", are found in the
+ *
+ * Some parts, here collectively called "Part A", are found in the
* FleXML package. They are Copyright (C) 1999-2005 Kristoffer Rose
* and Copyright (C) 2003-2013 Martin Quinson. All rights reserved.
*
* Notice that these are explicit rights granted to you for files
* generated by the FleXML system. For your rights in connection with
* the FleXML system itself please consult the GNU General Public License.
- *
+ *
* Part B
* ------
- *
- * The other parts, here collectively called "Part B", and which came
- * from the DTD used by FleXML to generate this program, can be
+ *
+ * The other parts, here collectively called "Part B", and which came
+ * from the DTD used by FleXML to generate this program, can be
* distributed (or not, as the case may be) under the terms of whoever
- * wrote them, provided these terms respect and obey the two conditions
+ * wrote them, provided these terms respect and obey the two conditions
* above under the heading "Part A".
*
* The author of and contributors to FleXML specifically disclaim
- * any copyright interest in "Part B", unless "Part B" was written
+ * any copyright interest in "Part B", unless "Part B" was written
* by the author of or contributors to FleXML.
- *
+ *
*/
#ifndef _FLEXML_dax_H
}
/** @brief loads a DAX file describing a DAG
- *
+ *
* See https://confluence.pegasus.isi.edu/display/pegasus/WorkflowGenerator for more details.
*/
xbt_dynar_t SD_daxload(const char *filename)
}
/** @brief loads a DOT file describing a DAG
- *
+ *
* See http://www.graphviz.org/doc/info/lang.html for more details.
* The size attribute of a node describes:
* - for a compute task: the amount of flops to execute
/** @brief create a complex data redistribution task that can then be auto-scheduled
*
- * Auto-scheduling mean that the task can be used with SD_task_schedulev().
+ * Auto-scheduling mean that the task can be used with SD_task_schedulev().
* This allows to specify the task costs at creation, and decouple them from the scheduling process where you just
* specify which resource should communicate.
*
xbt_cfg_register_string("smpi/lat-factor",
"65472:11.6436;15424:3.48845;9376:2.59299;5776:2.18796;3484:1.88101;1426:1.61075;732:1.9503;257:1.95341;0:2.01467", nullptr, "Latency factors for smpi.");
xbt_cfg_register_alias("smpi/lat-factor","smpi/lat_factor");
-
+
xbt_cfg_register_string("smpi/IB-penalty-factors", "0.965;0.925;1.35", nullptr,
"Correction factor to communications using Infiniband model with contention (default value based on Stampede cluster profiling)");
xbt_cfg_register_alias("smpi/IB-penalty-factors","smpi/IB_penalty_factors");
-
+
#if HAVE_SMPI
xbt_cfg_register_double("smpi/host-speed", 20000.0, nullptr, "Speed of the host running the simulation (in flop/s). Used to bench the operations.");
xbt_cfg_register_alias("smpi/host-speed","smpi/running_power");
}
return simgrid::simix::unmarshal<R>(self->simcall.result);
}
-
+
inline static void simcall_BODY_process_kill(smx_actor_t process) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_process_kill(&SIMIX_process_self()->simcall, process);
return simcall<void, smx_actor_t>(SIMCALL_PROCESS_KILL, process);
}
-
+
inline static void simcall_BODY_process_killall(int reset_pid) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_process_killall(&SIMIX_process_self()->simcall, reset_pid);
return simcall<void, int>(SIMCALL_PROCESS_KILLALL, reset_pid);
}
-
+
inline static void simcall_BODY_process_cleanup(smx_actor_t process) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) SIMIX_process_cleanup(process);
return simcall<void, smx_actor_t>(SIMCALL_PROCESS_CLEANUP, process);
}
-
+
inline static void simcall_BODY_process_suspend(smx_actor_t process) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_process_suspend(&SIMIX_process_self()->simcall, process);
return simcall<void, smx_actor_t>(SIMCALL_PROCESS_SUSPEND, process);
}
-
+
inline static void simcall_BODY_process_resume(smx_actor_t process) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) SIMIX_process_resume(process);
return simcall<void, smx_actor_t>(SIMCALL_PROCESS_RESUME, process);
}
-
+
inline static void simcall_BODY_process_set_host(smx_actor_t process, sg_host_t dest) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_process_set_host(&SIMIX_process_self()->simcall, process, dest);
return simcall<void, smx_actor_t, sg_host_t>(SIMCALL_PROCESS_SET_HOST, process, dest);
}
-
+
inline static int simcall_BODY_process_is_suspended(smx_actor_t process) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) SIMIX_process_is_suspended(process);
return simcall<int, smx_actor_t>(SIMCALL_PROCESS_IS_SUSPENDED, process);
}
-
+
inline static int simcall_BODY_process_join(smx_actor_t process, double timeout) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_process_join(&SIMIX_process_self()->simcall, process, timeout);
return simcall<int, smx_actor_t, double>(SIMCALL_PROCESS_JOIN, process, timeout);
}
-
+
inline static int simcall_BODY_process_sleep(double duration) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_process_sleep(&SIMIX_process_self()->simcall, duration);
return simcall<int, double>(SIMCALL_PROCESS_SLEEP, duration);
}
-
+
inline static smx_activity_t simcall_BODY_execution_start(const char* name, double flops_amount, double priority, double bound) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_execution_start(&SIMIX_process_self()->simcall, name, flops_amount, priority, bound);
return simcall<smx_activity_t, const char*, double, double, double>(SIMCALL_EXECUTION_START, name, flops_amount, priority, bound);
}
-
+
inline static smx_activity_t simcall_BODY_execution_parallel_start(const char* name, int host_nb, sg_host_t* host_list, double* flops_amount, double* bytes_amount, double amount, double rate, double timeout) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) SIMIX_execution_parallel_start(name, host_nb, host_list, flops_amount, bytes_amount, amount, rate, timeout);
return simcall<smx_activity_t, const char*, int, sg_host_t*, double*, double*, double, double, double>(SIMCALL_EXECUTION_PARALLEL_START, name, host_nb, host_list, flops_amount, bytes_amount, amount, rate, timeout);
}
-
+
inline static void simcall_BODY_execution_cancel(smx_activity_t execution) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) SIMIX_execution_cancel(execution);
return simcall<void, smx_activity_t>(SIMCALL_EXECUTION_CANCEL, execution);
}
-
+
inline static void simcall_BODY_execution_set_priority(smx_activity_t execution, double priority) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) SIMIX_execution_set_priority(execution, priority);
return simcall<void, smx_activity_t, double>(SIMCALL_EXECUTION_SET_PRIORITY, execution, priority);
}
-
+
inline static void simcall_BODY_execution_set_bound(smx_activity_t execution, double bound) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) SIMIX_execution_set_bound(execution, bound);
return simcall<void, smx_activity_t, double>(SIMCALL_EXECUTION_SET_BOUND, execution, bound);
}
-
+
inline static int simcall_BODY_execution_wait(smx_activity_t execution) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_execution_wait(&SIMIX_process_self()->simcall, execution);
return simcall<int, smx_activity_t>(SIMCALL_EXECUTION_WAIT, execution);
}
-
+
inline static void simcall_BODY_process_on_exit(smx_actor_t process, int_f_pvoid_pvoid_t fun, void* data) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) SIMIX_process_on_exit(process, fun, data);
return simcall<void, smx_actor_t, int_f_pvoid_pvoid_t, void*>(SIMCALL_PROCESS_ON_EXIT, process, fun, data);
}
-
+
inline static void simcall_BODY_process_auto_restart_set(smx_actor_t process, int auto_restart) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) SIMIX_process_auto_restart_set(process, auto_restart);
return simcall<void, smx_actor_t, int>(SIMCALL_PROCESS_AUTO_RESTART_SET, process, auto_restart);
}
-
+
inline static smx_actor_t simcall_BODY_process_restart(smx_actor_t process) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_process_restart(&SIMIX_process_self()->simcall, process);
return simcall<smx_actor_t, smx_actor_t>(SIMCALL_PROCESS_RESTART, process);
}
-
+
inline static smx_activity_t simcall_BODY_comm_iprobe(smx_mailbox_t mbox, int type, int src, int tag, simix_match_func_t match_fun, void* data) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_comm_iprobe(&SIMIX_process_self()->simcall, mbox, type, src, tag, match_fun, data);
return simcall<smx_activity_t, smx_mailbox_t, int, int, int, simix_match_func_t, void*>(SIMCALL_COMM_IPROBE, mbox, type, src, tag, match_fun, data);
}
-
+
inline static void simcall_BODY_comm_send(smx_actor_t sender, smx_mailbox_t mbox, double task_size, double rate, void* src_buff, size_t src_buff_size, simix_match_func_t match_fun, simix_copy_data_func_t copy_data_fun, void* data, double timeout) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_comm_send(&SIMIX_process_self()->simcall, sender, mbox, task_size, rate, src_buff, src_buff_size, match_fun, copy_data_fun, data, timeout);
return simcall<void, smx_actor_t, smx_mailbox_t, double, double, void*, size_t, simix_match_func_t, simix_copy_data_func_t, void*, double>(SIMCALL_COMM_SEND, sender, mbox, task_size, rate, src_buff, src_buff_size, match_fun, copy_data_fun, data, timeout);
}
-
+
inline static smx_activity_t simcall_BODY_comm_isend(smx_actor_t sender, smx_mailbox_t mbox, double task_size, double rate, void* src_buff, size_t src_buff_size, simix_match_func_t match_fun, simix_clean_func_t clean_fun, simix_copy_data_func_t copy_data_fun, void* data, int detached) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_comm_isend(&SIMIX_process_self()->simcall, sender, mbox, task_size, rate, src_buff, src_buff_size, match_fun, clean_fun, copy_data_fun, data, detached);
return simcall<smx_activity_t, smx_actor_t, smx_mailbox_t, double, double, void*, size_t, simix_match_func_t, simix_clean_func_t, simix_copy_data_func_t, void*, int>(SIMCALL_COMM_ISEND, sender, mbox, task_size, rate, src_buff, src_buff_size, match_fun, clean_fun, copy_data_fun, data, detached);
}
-
+
inline static void simcall_BODY_comm_recv(smx_actor_t receiver, smx_mailbox_t mbox, void* dst_buff, size_t* dst_buff_size, simix_match_func_t match_fun, simix_copy_data_func_t copy_data_fun, void* data, double timeout, double rate) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_comm_recv(&SIMIX_process_self()->simcall, receiver, mbox, dst_buff, dst_buff_size, match_fun, copy_data_fun, data, timeout, rate);
return simcall<void, smx_actor_t, smx_mailbox_t, void*, size_t*, simix_match_func_t, simix_copy_data_func_t, void*, double, double>(SIMCALL_COMM_RECV, receiver, mbox, dst_buff, dst_buff_size, match_fun, copy_data_fun, data, timeout, rate);
}
-
+
inline static smx_activity_t simcall_BODY_comm_irecv(smx_actor_t receiver, smx_mailbox_t mbox, void* dst_buff, size_t* dst_buff_size, simix_match_func_t match_fun, simix_copy_data_func_t copy_data_fun, void* data, double rate) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_comm_irecv(&SIMIX_process_self()->simcall, receiver, mbox, dst_buff, dst_buff_size, match_fun, copy_data_fun, data, rate);
return simcall<smx_activity_t, smx_actor_t, smx_mailbox_t, void*, size_t*, simix_match_func_t, simix_copy_data_func_t, void*, double>(SIMCALL_COMM_IRECV, receiver, mbox, dst_buff, dst_buff_size, match_fun, copy_data_fun, data, rate);
}
-
+
inline static int simcall_BODY_comm_waitany(xbt_dynar_t comms, double timeout) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_comm_waitany(&SIMIX_process_self()->simcall, comms, timeout);
return simcall<int, xbt_dynar_t, double>(SIMCALL_COMM_WAITANY, comms, timeout);
}
-
+
inline static void simcall_BODY_comm_wait(smx_activity_t comm, double timeout) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_comm_wait(&SIMIX_process_self()->simcall, comm, timeout);
return simcall<void, smx_activity_t, double>(SIMCALL_COMM_WAIT, comm, timeout);
}
-
+
inline static int simcall_BODY_comm_test(smx_activity_t comm) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_comm_test(&SIMIX_process_self()->simcall, comm);
return simcall<int, smx_activity_t>(SIMCALL_COMM_TEST, comm);
}
-
+
inline static int simcall_BODY_comm_testany(smx_activity_t* comms, size_t count) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_comm_testany(&SIMIX_process_self()->simcall, comms, count);
return simcall<int, smx_activity_t*, size_t>(SIMCALL_COMM_TESTANY, comms, count);
}
-
+
inline static smx_mutex_t simcall_BODY_mutex_init() {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_mutex_init(&SIMIX_process_self()->simcall);
return simcall<smx_mutex_t>(SIMCALL_MUTEX_INIT);
}
-
+
inline static void simcall_BODY_mutex_lock(smx_mutex_t mutex) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_mutex_lock(&SIMIX_process_self()->simcall, mutex);
return simcall<void, smx_mutex_t>(SIMCALL_MUTEX_LOCK, mutex);
}
-
+
inline static int simcall_BODY_mutex_trylock(smx_mutex_t mutex) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_mutex_trylock(&SIMIX_process_self()->simcall, mutex);
return simcall<int, smx_mutex_t>(SIMCALL_MUTEX_TRYLOCK, mutex);
}
-
+
inline static void simcall_BODY_mutex_unlock(smx_mutex_t mutex) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_mutex_unlock(&SIMIX_process_self()->simcall, mutex);
return simcall<void, smx_mutex_t>(SIMCALL_MUTEX_UNLOCK, mutex);
}
-
+
inline static smx_cond_t simcall_BODY_cond_init() {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) SIMIX_cond_init();
return simcall<smx_cond_t>(SIMCALL_COND_INIT);
}
-
+
inline static void simcall_BODY_cond_signal(smx_cond_t cond) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) SIMIX_cond_signal(cond);
return simcall<void, smx_cond_t>(SIMCALL_COND_SIGNAL, cond);
}
-
+
inline static void simcall_BODY_cond_wait(smx_cond_t cond, smx_mutex_t mutex) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_cond_wait(&SIMIX_process_self()->simcall, cond, mutex);
return simcall<void, smx_cond_t, smx_mutex_t>(SIMCALL_COND_WAIT, cond, mutex);
}
-
+
inline static void simcall_BODY_cond_wait_timeout(smx_cond_t cond, smx_mutex_t mutex, double timeout) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_cond_wait_timeout(&SIMIX_process_self()->simcall, cond, mutex, timeout);
return simcall<void, smx_cond_t, smx_mutex_t, double>(SIMCALL_COND_WAIT_TIMEOUT, cond, mutex, timeout);
}
-
+
inline static void simcall_BODY_cond_broadcast(smx_cond_t cond) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) SIMIX_cond_broadcast(cond);
return simcall<void, smx_cond_t>(SIMCALL_COND_BROADCAST, cond);
}
-
+
inline static smx_sem_t simcall_BODY_sem_init(unsigned int capacity) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) SIMIX_sem_init(capacity);
return simcall<smx_sem_t, unsigned int>(SIMCALL_SEM_INIT, capacity);
}
-
+
inline static void simcall_BODY_sem_release(smx_sem_t sem) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_sem_release(&SIMIX_process_self()->simcall, sem);
return simcall<void, smx_sem_t>(SIMCALL_SEM_RELEASE, sem);
}
-
+
inline static int simcall_BODY_sem_would_block(smx_sem_t sem) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_sem_would_block(&SIMIX_process_self()->simcall, sem);
return simcall<int, smx_sem_t>(SIMCALL_SEM_WOULD_BLOCK, sem);
}
-
+
inline static void simcall_BODY_sem_acquire(smx_sem_t sem) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_sem_acquire(&SIMIX_process_self()->simcall, sem);
return simcall<void, smx_sem_t>(SIMCALL_SEM_ACQUIRE, sem);
}
-
+
inline static void simcall_BODY_sem_acquire_timeout(smx_sem_t sem, double timeout) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_sem_acquire_timeout(&SIMIX_process_self()->simcall, sem, timeout);
return simcall<void, smx_sem_t, double>(SIMCALL_SEM_ACQUIRE_TIMEOUT, sem, timeout);
}
-
+
inline static int simcall_BODY_sem_get_capacity(smx_sem_t sem) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_sem_get_capacity(&SIMIX_process_self()->simcall, sem);
return simcall<int, smx_sem_t>(SIMCALL_SEM_GET_CAPACITY, sem);
}
-
+
inline static sg_size_t simcall_BODY_file_read(smx_file_t fd, sg_size_t size, sg_host_t host) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_file_read(&SIMIX_process_self()->simcall, fd, size, host);
return simcall<sg_size_t, smx_file_t, sg_size_t, sg_host_t>(SIMCALL_FILE_READ, fd, size, host);
}
-
+
inline static sg_size_t simcall_BODY_file_write(smx_file_t fd, sg_size_t size, sg_host_t host) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_file_write(&SIMIX_process_self()->simcall, fd, size, host);
return simcall<sg_size_t, smx_file_t, sg_size_t, sg_host_t>(SIMCALL_FILE_WRITE, fd, size, host);
}
-
+
inline static smx_file_t simcall_BODY_file_open(const char* fullpath, sg_host_t host) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_file_open(&SIMIX_process_self()->simcall, fullpath, host);
return simcall<smx_file_t, const char*, sg_host_t>(SIMCALL_FILE_OPEN, fullpath, host);
}
-
+
inline static int simcall_BODY_file_close(smx_file_t fd, sg_host_t host) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_file_close(&SIMIX_process_self()->simcall, fd, host);
return simcall<int, smx_file_t, sg_host_t>(SIMCALL_FILE_CLOSE, fd, host);
}
-
+
inline static int simcall_BODY_file_unlink(smx_file_t fd, sg_host_t host) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) SIMIX_file_unlink(fd, host);
return simcall<int, smx_file_t, sg_host_t>(SIMCALL_FILE_UNLINK, fd, host);
}
-
+
inline static sg_size_t simcall_BODY_file_get_size(smx_file_t fd) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_file_get_size(&SIMIX_process_self()->simcall, fd);
return simcall<sg_size_t, smx_file_t>(SIMCALL_FILE_GET_SIZE, fd);
}
-
+
inline static sg_size_t simcall_BODY_file_tell(smx_file_t fd) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_file_tell(&SIMIX_process_self()->simcall, fd);
return simcall<sg_size_t, smx_file_t>(SIMCALL_FILE_TELL, fd);
}
-
+
inline static int simcall_BODY_file_seek(smx_file_t fd, sg_offset_t offset, int origin) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_file_seek(&SIMIX_process_self()->simcall, fd, offset, origin);
return simcall<int, smx_file_t, sg_offset_t, int>(SIMCALL_FILE_SEEK, fd, offset, origin);
}
-
+
inline static xbt_dynar_t simcall_BODY_file_get_info(smx_file_t fd) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_file_get_info(&SIMIX_process_self()->simcall, fd);
return simcall<xbt_dynar_t, smx_file_t>(SIMCALL_FILE_GET_INFO, fd);
}
-
+
inline static int simcall_BODY_file_move(smx_file_t fd, const char* fullpath) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_file_move(&SIMIX_process_self()->simcall, fd, fullpath);
return simcall<int, smx_file_t, const char*>(SIMCALL_FILE_MOVE, fd, fullpath);
}
-
+
inline static int simcall_BODY_mc_random(int min, int max) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) simcall_HANDLER_mc_random(&SIMIX_process_self()->simcall, min, max);
return simcall<int, int, int>(SIMCALL_MC_RANDOM, min, max);
}
-
+
inline static void simcall_BODY_set_category(smx_activity_t synchro, const char* category) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) SIMIX_set_category(synchro, category);
return simcall<void, smx_activity_t, const char*>(SIMCALL_SET_CATEGORY, synchro, category);
}
-
+
inline static void simcall_BODY_run_kernel(std::function<void()> const* code) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) SIMIX_run_kernel(code);
return simcall<void, std::function<void()> const*>(SIMCALL_RUN_KERNEL, code);
}
-
+
inline static void simcall_BODY_run_blocking(std::function<void()> const* code) {
/* Go to that function to follow the code flow through the simcall barrier */
if (0) SIMIX_run_blocking(code);
/** @private
* @brief (in kernel mode) unpack the simcall and activate the handler
- *
+ *
* This function is generated from src/simix/simcalls.in
*/
void SIMIX_simcall_handle(smx_simcall_t simcall, int value) {
#include <sys/mman.h>
#endif
-#ifdef __MINGW32__
-#define _aligned_malloc __mingw_aligned_malloc
-#define _aligned_free __mingw_aligned_free
-#endif //MINGW
+#ifdef __MINGW32__
+#define _aligned_malloc __mingw_aligned_malloc
+#define _aligned_free __mingw_aligned_free
+#endif /*MINGW*/
#if HAVE_VALGRIND_H
# include <valgrind/valgrind.h>
* \param nb_threads the number of threads to use
*/
void SIMIX_context_set_nthreads(int nb_threads) {
- if (nb_threads<=0) {
+ if (nb_threads<=0) {
nb_threads = xbt_os_get_numcores();
XBT_INFO("Auto-setting contexts/nthreads to %d",nb_threads);
}
#include "src/mc/remote/Client.hpp"
#include "src/mc/remote/mc_protocol.h"
#include <stdlib.h>
-#endif
+#endif
#include "src/mc/mc_record.h"
XBT_PRIVATE int SIMIX_file_seek(smx_actor_t process, smx_file_t fd, sg_offset_t offset, int origin);
XBT_PRIVATE int SIMIX_file_move(smx_actor_t process, smx_file_t fd, const char* fullpath);
-XBT_PRIVATE xbt_dict_t SIMIX_storage_get_properties(smx_storage_t storage);
-
XBT_PRIVATE void SIMIX_io_destroy(smx_activity_t synchro);
XBT_PRIVATE void SIMIX_io_finish(smx_activity_t synchro);
} else{
this_comm = new simgrid::kernel::activity::CommImpl(SIMIX_COMM_RECEIVE);
smx_type = SIMIX_COMM_SEND;
- }
+ }
smx_activity_t other_synchro=nullptr;
if (mbox->permanent_receiver != nullptr && not mbox->done_comm_queue.empty()) {
XBT_DEBUG("first check in the permanent recv mailbox, to see if we already got something");
if (MC_is_active() || MC_record_replay_is_active()){
if (timeout > 0.0)
- xbt_die("Timeout not implemented for waitany in the model-checker");
+ xbt_die("Timeout not implemented for waitany in the model-checker");
int idx = SIMCALL_GET_MC_VALUE(simcall);
synchro = xbt_dynar_get_as(synchros, idx, smx_activity_t);
synchro->simcalls.push_back(simcall);
SIMIX_comm_finish(synchro);
return;
}
-
+
if (timeout < 0.0){
simcall->timer = NULL;
} else {
SIMIX_simcall_answer(simcall);
});
}
-
+
xbt_dynar_foreach(synchros, cursor, synchro){
/* associate this simcall to the the synchro */
synchro->simcalls.push_back(simcall);
_smx_throw_issuer->exception = std::make_exception_ptr(e); \
} else ((void)0)
-/* ******************************** File ************************************ */
typedef struct s_smx_file {
surf_file_t surf_file;
- void* data; /**< @brief user data */
} s_smx_file_t;
/** Unlock a mutex for a process
*
- * Unlocks the mutex and gives it to a process waiting for it.
+ * Unlocks the mutex and gives it to a process waiting for it.
* If the unlocker is not the owner of the mutex nothing happens.
* If there are no process waiting, it sets the mutex as free.
*/
synchro = SIMIX_synchro_wait(issuer->host, timeout);
synchro->simcalls.push_front(simcall);
issuer->waiting_synchro = synchro;
- xbt_swag_insert(simcall->issuer, cond->sleeping);
+ xbt_swag_insert(simcall->issuer, cond->sleeping);
XBT_OUT();
}
/**
* \brief Signalizes a condition.
*
- * Signalizes a condition and wakes up a sleeping process.
+ * Signalizes a condition and wakes up a sleeping process.
* If there are no process sleeping, no action is done.
* \param cond A condition
*/
XBT_DEBUG("Signal condition %p", cond);
- /* If there are processes waiting for the condition choose one and try
+ /* If there are processes waiting for the condition choose one and try
to make it acquire the mutex */
if ((proc = (smx_actor_t) xbt_swag_extract(cond->sleeping))) {
void simcall_HANDLER_sem_acquire_timeout(smx_simcall_t simcall, smx_sem_t sem, double timeout)
{
XBT_IN("(%p)",simcall);
- _SIMIX_sem_wait(sem, timeout, simcall->issuer, simcall);
+ _SIMIX_sem_wait(sem, timeout, simcall->issuer, simcall);
XBT_OUT();
}
int simcall_HANDLER_sem_would_block(smx_simcall_t simcall, smx_sem_t sem) {
* comm: communication
* Descrp: Function realizes the allgather operation using the 2dmesh
* algorithm. Allgather ommunication occurs first in the x dimension then in
- * the y dimension. The communication in each dimension follows
+ * the y dimension. The communication in each dimension follows
* "simple"
* Auther: Ahmad Faraj
****************************************************************************/
req_ptr = req;
- // do local allgather/local copy
+ // do local allgather/local copy
recv_offset = rank * block_size;
Datatype::copy(send_buff, send_count, send_type, (char *)recv_buff + recv_offset,
recv_count, recv_type);
req_ptr = req;
- // do local allgather/local copy
+ // do local allgather/local copy
recv_offset = rank * block_size;
Datatype::copy(send_buff, send_count, send_type, (char *)recv_buff + recv_offset,
recv_count, recv_type);
- // do rowwise comm
+ // do rowwise comm
for (i = 0; i < Y; i++) {
src = i + my_row_base;
if (src == rank)
// irregular case use default MPI fucntions
if (scount * sextent != rcount * rextent) {
- XBT_WARN("MPI_allgather_NTSLR_NB use default MPI_allgather.");
+ XBT_WARN("MPI_allgather_NTSLR_NB use default MPI_allgather.");
Coll_allgather_default::allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
- return MPI_SUCCESS;
+ return MPI_SUCCESS;
}
// topo non-specific
// irregular case use default MPI fucntions
if (scount * sextent != rcount * rextent) {
- XBT_WARN("MPI_allgather_NTSLR use default MPI_allgather.");
+ XBT_WARN("MPI_allgather_NTSLR use default MPI_allgather.");
Coll_allgather_default::allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
- return MPI_SUCCESS;
+ return MPI_SUCCESS;
}
// topo non-specific
if (comm_size <= num_core) {
XBT_WARN("MPI_allgather_SMP_NTS use default MPI_allgather.");
Coll_allgather_default::allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
- return MPI_SUCCESS;
+ return MPI_SUCCESS;
}
// the last SMP node may have fewer number of running processes than all others
}
- // INTER-SMP-ALLGATHER
+ // INTER-SMP-ALLGATHER
// Every root of each SMP node post INTER-Sendrecv, then do INTRA-Bcast for each receiving message
// Use logical ring algorithm
* algorithm.
* Auther: MPICH
* Comment: Original bruck algorithm from MPICH is slightly modified by
- * Ahmad Faraj.
+ * Ahmad Faraj.
****************************************************************************/
namespace simgrid{
char *send_ptr = (char *) send_buff;
char *recv_ptr = (char *) recv_buff;
- // get size of the communicator, followed by rank
+ // get size of the communicator, followed by rank
num_procs = comm->size();
rank = comm->rank();
{
int rank, size;
int local_rank, local_size;
- int leader_comm_size = 0;
+ int leader_comm_size = 0;
int mpi_errno = MPI_SUCCESS;
MPI_Aint recvtype_extent = 0; /* Datatype extent */
MPI_Comm shmem_comm, leader_comm;
if (not comm->is_uniform() || not comm->is_blocked())
THROWF(arg_error,0, "allgather MVAPICH2 smp algorithm can't be used with irregular deployment. Please insure that processes deployed on the same node are contiguous and that each node has the same number of processes");
-
+
if (recvcnt == 0) {
return MPI_SUCCESS;
}
/* extract the rank,size information for the intra-node communicator */
recvtype_extent=recvtype->get_extent();
-
+
shmem_comm = comm->get_intra_comm();
local_rank = shmem_comm->rank();
local_size = shmem_comm->size();
/*If there is just one node, after gather itself,
* root has all the data and it can do bcast*/
if(local_rank == 0) {
- mpi_errno = Colls::gather(sendbuf, sendcnt,sendtype,
- (void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)),
+ mpi_errno = Colls::gather(sendbuf, sendcnt,sendtype,
+ (void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)),
recvcnt, recvtype,
0, shmem_comm);
} else {
- /*Since in allgather all the processes could have
+ /*Since in allgather all the processes could have
* its own data in place*/
if(sendbuf == MPI_IN_PLACE) {
- mpi_errno = Colls::gather((void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)),
- recvcnt , recvtype,
+ mpi_errno = Colls::gather((void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)),
+ recvcnt , recvtype,
recvbuf, recvcnt, recvtype,
0, shmem_comm);
} else {
- mpi_errno = Colls::gather(sendbuf, sendcnt,sendtype,
+ mpi_errno = Colls::gather(sendbuf, sendcnt,sendtype,
recvbuf, recvcnt, recvtype,
0, shmem_comm);
}
mpi_errno = Colls::allgatherv(sendbuf,
(recvcnt*local_size),
- recvtype,
+ recvtype,
recvbuf, recvcnts,
displs, recvtype,
leader_comm);
xbt_free(recvcnts);
} else {
void* sendtmpbuf=((char*)recvbuf)+recvtype->get_extent()*(recvcnt*local_size)*leader_comm->rank();
-
-
- mpi_errno = Coll_allgather_mpich::allgather(sendtmpbuf,
+
+
+ mpi_errno = Coll_allgather_mpich::allgather(sendtmpbuf,
(recvcnt*local_size),
recvtype,
recvbuf, (recvcnt*local_size), recvtype,
* Returns: MPI_SUCCESS or error code
*
* Description: Neighbor Exchange algorithm for allgather.
- * Described by Chen et.al. in
- * "Performance Evaluation of Allgather Algorithms on
+ * Described by Chen et.al. in
+ * "Performance Evaluation of Allgather Algorithms on
* Terascale Linux Cluster with Fast Ethernet",
- * Proceedings of the Eighth International Conference on
+ * Proceedings of the Eighth International Conference on
* High-Performance Computing inn Asia-Pacific Region
* (HPCASIA'05), 2005
- *
+ *
* Rank r exchanges message with one of its neighbors and
* forwards the data further in the next step.
*
* No additional memory requirements.
- *
+ *
* Limitations: Algorithm works only on even number of processes.
* For odd number of processes we switch to ring algorithm.
- *
+ *
* Example on 6 nodes:
* Initial state
* # 0 1 2 3 4 5
* [4] [4] [4] [4] [4] [4]
* [5] [5] [5] [5] [5] [5]
*/
-
+
#include "../colls_private.h"
namespace simgrid{
namespace smpi{
-int
+int
Coll_allgather_ompi_neighborexchange::allgather(void *sbuf, int scount,
MPI_Datatype sdtype,
void* rbuf, int rcount,
if (size % 2) {
XBT_DEBUG(
- "coll:tuned:allgather_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm",
+ "coll:tuned:allgather_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm",
size);
return Coll_allgather_ring::allgather(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
if (MPI_IN_PLACE != sbuf) {
tmpsend = (char*) sbuf;
Datatype::copy (tmpsend, scount, sdtype, tmprecv, rcount, rdtype);
- }
+ }
/* Determine neighbors, order in which blocks will arrive, etc. */
even_rank = not(rank % 2);
/* Communication loop:
- First step is special: exchange a single block with neighbor[0].
- - Rest of the steps:
- update recv_data_from according to offset, and
+ - Rest of the steps:
+ update recv_data_from according to offset, and
exchange two blocks with appropriate neighbor.
the send location becomes previous receve location.
*/
for (i = 1; i < (size / 2); i++) {
const int i_parity = i % 2;
- recv_data_from[i_parity] =
+ recv_data_from[i_parity] =
(recv_data_from[i_parity] + offset_at_step[i_parity] + size) % size;
tmprecv = (char*)rbuf + recv_data_from[i_parity] * rcount * rext;
tmpsend = (char*)rbuf + send_data_from * rcount * rext;
-
+
/* Sendreceive */
- Request::sendrecv(tmpsend, 2 * rcount, rdtype,
- neighbor[i_parity],
+ Request::sendrecv(tmpsend, 2 * rcount, rdtype,
+ neighbor[i_parity],
COLL_TAG_ALLGATHER,
tmprecv, 2 * rcount, rdtype,
neighbor[i_parity],
char *send_ptr = (char *) sbuf;
char *recv_ptr = (char *) rbuf;
- // get size of the communicator, followed by rank
+ // get size of the communicator, followed by rank
unsigned int num_procs = comm->size();
unsigned int rank = comm->rank();
unsigned int mask;
int curr_count;
- // get size of the communicator, followed by rank
+ // get size of the communicator, followed by rank
unsigned int num_procs = comm->size();
if((num_procs&(num_procs-1)))
recv_chunk = r_extent * recv_count;
if (send_chunk != recv_chunk) {
- XBT_WARN("MPI_allgather_rhv use default MPI_allgather.");
+ XBT_WARN("MPI_allgather_rhv use default MPI_allgather.");
Coll_allgather_default::allgather(sbuf, send_count, send_type, rbuf, recv_count,
recv_type, comm);
- return MPI_SUCCESS;
+ return MPI_SUCCESS;
}
// compute starting offset location to perform local copy
}
- // INTER-SMP-ALLGATHER
+ // INTER-SMP-ALLGATHER
// Every root of each SMP node post INTER-Sendrecv, then do INTRA-Bcast for each receiving message
src = ((inter_rank - i + inter_comm_size) % inter_comm_size) * num_core;
//send_offset = (rank * sextent * scount);
recv_offset = (src * sextent * scount);
- // Request::sendrecv((recv_buf+send_offset), (scount * num_core), stype, dst, tag,
+ // Request::sendrecv((recv_buf+send_offset), (scount * num_core), stype, dst, tag,
// (recv_buf+recv_offset), (rcount * num_core), rtype, src, tag, comm, &status);
//MPIC_Isend((recv_buf+send_offset), (scount * num_core), stype, dst, tag, comm, req_ptr++);
*(req_ptr++) = Request::irecv(((char *) recv_buf + recv_offset), (rcount * num_core), rtype,
//src = ((inter_rank-i+inter_comm_size)%inter_comm_size) * num_core;
send_offset = (rank * sextent * scount);
//recv_offset = (src * sextent * scount);
- // Request::sendrecv((recv_buf+send_offset), (scount * num_core), stype, dst, tag,
+ // Request::sendrecv((recv_buf+send_offset), (scount * num_core), stype, dst, tag,
// (recv_buf+recv_offset), (rcount * num_core), rtype, src, tag, comm, &status);
*(req_ptr++) = Request::isend(((char *) recv_buf + send_offset), (scount * num_core), stype,
dst, tag, comm);
* in Multiport Message-Passing Systems"
* Note: Unlike in case of allgather implementation, we relay on
* indexed datatype to select buffers appropriately.
- * The only additional memory requirement is for creation of
+ * The only additional memory requirement is for creation of
* temporary datatypes.
* Example on 7 nodes (memory lay out need not be in-order)
* Initial set up:
* [ ] [ ] [ ] [ ] [5] [5] [ ]
* [ ] [ ] [ ] [ ] [ ] [6] [6]
* Step 1: send message to (rank - 2^1), receive message from (rank + 2^1).
- * message contains all blocks from (rank) .. (rank + 2^2) with
+ * message contains all blocks from (rank) .. (rank + 2^2) with
* wrap around.
* # 0 1 2 3 4 5 6
* [0] [ ] [ ] [ ] [0] [0] [0]
* [ ] [ ] [5] [5] [5] [5] [ ]
* [ ] [ ] [ ] [6] [6] [6] [6]
* Step 2: send message to (rank - 2^2), receive message from (rank + 2^2).
- * message size is "all remaining blocks"
+ * message size is "all remaining blocks"
* # 0 1 2 3 4 5 6
* [0] [0] [0] [0] [0] [0] [0]
* [1] [1] [1] [1] [1] [1] [1]
int Coll_allgatherv_ompi_bruck::allgatherv(void *sbuf, int scount,
MPI_Datatype sdtype,
void *rbuf, int *rcounts,
- int *rdispls,
+ int *rdispls,
MPI_Datatype rdtype,
MPI_Comm comm)
{
XBT_DEBUG(
"coll:tuned:allgather_ompi_bruck rank %d", rank);
-
+
sdtype->extent(&slb, &sext);
rdtype->extent(&rlb, &rext);
/* Initialization step:
- - if send buffer is not MPI_IN_PLACE, copy send buffer to block rank of
+ - if send buffer is not MPI_IN_PLACE, copy send buffer to block rank of
the receive buffer.
*/
tmprecv = (char*) rbuf + rdispls[rank] * rext;
if (MPI_IN_PLACE != sbuf) {
tmpsend = (char*) sbuf;
- Datatype::copy(tmpsend, scount, sdtype,
+ Datatype::copy(tmpsend, scount, sdtype,
tmprecv, rcounts[rank], rdtype);
}
-
+
/* Communication step:
At every step i, rank r:
- doubles the distance
- sends message with blockcount blocks, (rbuf[rank] .. rbuf[rank + 2^i])
to rank (r - distance)
- - receives message of blockcount blocks,
- (rbuf[r + distance] ... rbuf[(r+distance) + 2^i]) from
+ - receives message of blockcount blocks,
+ (rbuf[r + distance] ... rbuf[(r+distance) + 2^i]) from
rank (r + distance)
- - blockcount doubles until the last step when only the remaining data is
+ - blockcount doubles until the last step when only the remaining data is
exchanged.
*/
new_rcounts = (int*) calloc(4*size, sizeof(int));
if (distance <= (size >> 1)) {
blockcount = distance;
- } else {
+ } else {
blockcount = size - distance;
}
new_rcounts[i] = rcounts[tmp_rrank];
new_rdispls[i] = rdispls[tmp_rrank];
}
- Datatype::create_indexed(blockcount, new_scounts, new_sdispls,
+ Datatype::create_indexed(blockcount, new_scounts, new_sdispls,
rdtype, &new_sdtype);
Datatype::create_indexed(blockcount, new_rcounts, new_rdispls,
rdtype, &new_rdtype);
* Accepts: Same arguments as MPI_Allgatherv
* Returns: MPI_SUCCESS or error code
*
- * Description: Neighbor Exchange algorithm for allgather adapted for
+ * Description: Neighbor Exchange algorithm for allgather adapted for
* allgatherv.
- * Described by Chen et.al. in
- * "Performance Evaluation of Allgather Algorithms on
+ * Described by Chen et.al. in
+ * "Performance Evaluation of Allgather Algorithms on
* Terascale Linux Cluster with Fast Ethernet",
- * Proceedings of the Eighth International Conference on
+ * Proceedings of the Eighth International Conference on
* High-Performance Computing inn Asia-Pacific Region
* (HPCASIA'05), 2005
- *
+ *
* Rank r exchanges message with one of its neighbors and
* forwards the data further in the next step.
*
* No additional memory requirements.
- *
+ *
* Limitations: Algorithm works only on even number of processes.
* For odd number of processes we switch to ring algorithm.
- *
+ *
* Example on 6 nodes:
* Initial state
* # 0 1 2 3 4 5
* [4] [4] [4] [4] [4] [4]
* [5] [5] [5] [5] [5] [5]
*/
-
+
#include "../colls_private.h"
-
+
namespace simgrid{
namespace smpi{
-int
+int
Coll_allgatherv_ompi_neighborexchange::allgatherv(void *sbuf, int scount,
MPI_Datatype sdtype,
void* rbuf, int *rcounts, int *rdispls,
int line = -1;
int rank, size;
int neighbor[2], offset_at_step[2], recv_data_from[2], send_data_from;
-
+
int i, even_rank;
int err = 0;
ptrdiff_t slb, rlb, sext, rext;
if (size % 2) {
XBT_DEBUG(
- "coll:tuned:allgatherv_ompi_neighborexchange WARNING: odd size %d, switching to ring algorithm",
+ "coll:tuned:allgatherv_ompi_neighborexchange WARNING: odd size %d, switching to ring algorithm",
size);
return Coll_allgatherv_ring::allgatherv(sbuf, scount, sdtype,
- rbuf, rcounts,
+ rbuf, rcounts,
rdispls, rdtype,
comm);
}
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
/* Initialization step:
- - if send buffer is not MPI_IN_PLACE, copy send buffer to
+ - if send buffer is not MPI_IN_PLACE, copy send buffer to
the appropriate block of receive buffer
*/
tmprecv = (char*) rbuf + rdispls[rank] * rext;
if (MPI_IN_PLACE != sbuf) {
tmpsend = (char*) sbuf;
- err = Datatype::copy(tmpsend, scount, sdtype,
+ err = Datatype::copy(tmpsend, scount, sdtype,
tmprecv, rcounts[rank], rdtype);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
- }
+ }
/* Determine neighbors, order in which blocks will arrive, etc. */
even_rank = !(rank % 2);
/* Communication loop:
- First step is special: exchange a single block with neighbor[0].
- - Rest of the steps:
- update recv_data_from according to offset, and
+ - Rest of the steps:
+ update recv_data_from according to offset, and
exchange two blocks with appropriate neighbor.
the send location becomes previous receve location.
Note, we need to create indexed datatype to send and receive these
*/
tmprecv = (char*)rbuf + rdispls[neighbor[0]] * rext;
tmpsend = (char*)rbuf + rdispls[rank] * rext;
- Request::sendrecv(tmpsend, rcounts[rank], rdtype,
+ Request::sendrecv(tmpsend, rcounts[rank], rdtype,
neighbor[0], COLL_TAG_ALLGATHERV,
- tmprecv, rcounts[neighbor[0]], rdtype,
+ tmprecv, rcounts[neighbor[0]], rdtype,
neighbor[0], COLL_TAG_ALLGATHERV,
comm, MPI_STATUS_IGNORE);
-
-
+
+
/* Determine initial sending counts and displacements*/
if (even_rank) {
send_data_from = rank;
MPI_Datatype new_rdtype, new_sdtype;
int new_scounts[2], new_sdispls[2], new_rcounts[2], new_rdispls[2];
const int i_parity = i % 2;
- recv_data_from[i_parity] =
+ recv_data_from[i_parity] =
(recv_data_from[i_parity] + offset_at_step[i_parity] + size) % size;
/* Create new indexed types for sending and receiving.
We are receiving data from ranks (recv_data_from[i_parity]) and
(recv_data_from[i_parity]+1).
*/
-
+
new_scounts[0] = rcounts[send_data_from];
new_scounts[1] = rcounts[(send_data_from + 1)];
new_sdispls[0] = rdispls[send_data_from];
new_sdispls[1] = rdispls[(send_data_from + 1)];
- err = Datatype::create_indexed(2, new_scounts, new_sdispls, rdtype,
+ err = Datatype::create_indexed(2, new_scounts, new_sdispls, rdtype,
&new_sdtype);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
new_sdtype->commit();
new_rcounts[1] = rcounts[(recv_data_from[i_parity] + 1)];
new_rdispls[0] = rdispls[recv_data_from[i_parity]];
new_rdispls[1] = rdispls[(recv_data_from[i_parity] + 1)];
- err = Datatype::create_indexed(2, new_rcounts, new_rdispls, rdtype,
+ err = Datatype::create_indexed(2, new_rcounts, new_rdispls, rdtype,
&new_rdtype);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
new_rdtype->commit();
-
+
tmprecv = (char*)rbuf;
tmpsend = (char*)rbuf;
-
+
/* Sendreceive */
Request::sendrecv(tmpsend, 1, new_sdtype, neighbor[i_parity],
COLL_TAG_ALLGATHERV,
comm, MPI_STATUS_IGNORE);
send_data_from = recv_data_from[i_parity];
-
+
Datatype::unref(new_sdtype);
Datatype::unref(new_rdtype);
}
for (i = 1; i < num_procs; i++) {
src = (rank - i + num_procs) % num_procs;
dst = (rank + i) % num_procs;
-
+
Request::sendrecv(sendptr, send_count, send_type, dst, tag,
recvptr + recv_disps[src] * extent, recv_counts[src], recv_type,
src, tag, comm, &status);
#include "../colls_private.h"
-/* IMPLEMENTED BY PITCH PATARASUK
- Non-topoloty-specific all-reduce operation designed bandwidth optimally
+/* IMPLEMENTED BY PITCH PATARASUK
+ Non-topoloty-specific all-reduce operation designed bandwidth optimally
Bug fixing by Xin Yuan, 04/04/2008
*/
if (rcount < size) {
XBT_WARN("MPI_allreduce_lr use default MPI_allreduce.");
Coll_allreduce_default::allreduce(sbuf, rbuf, rcount, dtype, op, comm);
- return MPI_SUCCESS;
+ return MPI_SUCCESS;
}
- /* when communication size is not divisible by number of process:
+ /* when communication size is not divisible by number of process:
call the native implementation for the remain chunk at the end of the operation */
if (rcount % size != 0) {
remainder = rcount % size;
/* our ALL-REDUCE implementation
1. copy (partial of)send_buf to recv_buf
2. use logical ring reduce-scatter
- 3. use logical ring all-gather
+ 3. use logical ring all-gather
*/
// copy partial data
((rank + size - 1) % size), tag + i, comm, &status);
}
- /* when communication size is not divisible by number of process:
+ /* when communication size is not divisible by number of process:
call the native implementation for the remain chunk at the end of the operation */
if (remainder_flag) {
return Colls::allreduce((char *) sbuf + remainder_offset,
int count,
MPI_Datatype datatype,
MPI_Op op, MPI_Comm comm);
-
-
+
+
namespace simgrid{
namespace smpi{
static int MPIR_Allreduce_reduce_p2p_MV2( void *sendbuf,
return MPI_SUCCESS;
}
-
+
/* general two level allreduce helper function */
int Coll_allreduce_mvapich2_two_level::allreduce(void *sendbuf,
void *recvbuf,
MV2_Allreduce_intra_function = Coll_allreduce_mpich::allreduce;
if(MV2_Allreducection==NULL)
MV2_Allreducection = Coll_allreduce_rdb::allreduce;
-
+
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
}
-
+
if (count == 0) {
return MPI_SUCCESS;
}
/* Doing the shared memory gather and reduction by the leader */
if (local_rank == 0) {
- if ((MV2_Allreduce_intra_function == &MPIR_Allreduce_reduce_shmem_MV2) ||
+ if ((MV2_Allreduce_intra_function == &MPIR_Allreduce_reduce_shmem_MV2) ||
(MV2_Allreduce_intra_function == &MPIR_Allreduce_reduce_p2p_MV2) ) {
mpi_errno =
MV2_Allreduce_intra_function(sendbuf, recvbuf, count, datatype,
}
} else {
/* insert the first reduce here */
- if ((MV2_Allreduce_intra_function == &MPIR_Allreduce_reduce_shmem_MV2) ||
+ if ((MV2_Allreduce_intra_function == &MPIR_Allreduce_reduce_shmem_MV2) ||
(MV2_Allreduce_intra_function == &MPIR_Allreduce_reduce_p2p_MV2) ) {
mpi_errno =
MV2_Allreduce_intra_function(sendbuf, recvbuf, count, datatype,
* Accepts: Same as MPI_Allreduce(), segment size
* Returns: MPI_SUCCESS or error code
*
- * Description: Implements pipelined ring algorithm for allreduce:
+ * Description: Implements pipelined ring algorithm for allreduce:
* user supplies suggested segment size for the pipelining of
* reduce operation.
- * The segment size determines the number of phases, np, for
- * the algorithm execution.
- * The message is automatically divided into blocks of
+ * The segment size determines the number of phases, np, for
+ * the algorithm execution.
+ * The message is automatically divided into blocks of
* approximately (count / (np * segcount)) elements.
- * At the end of reduction phase, allgather like step is
+ * At the end of reduction phase, allgather like step is
* executed.
* Algorithm requires (np + 1)*(N - 1) steps.
*
- * Limitations: The algorithm DOES NOT preserve order of operations so it
+ * Limitations: The algorithm DOES NOT preserve order of operations so it
* can be used only for commutative operations.
- * In addition, algorithm cannot work if the total size is
+ * In addition, algorithm cannot work if the total size is
* less than size * segment size.
* Example on 3 nodes with 2 phases
* Initial state
- * # 0 1 2
+ * # 0 1 2
* [00a] [10a] [20a]
* [00b] [10b] [20b]
* [01a] [11a] [21a]
* [02b] [12b] [22b]
*
* COMPUTATION PHASE 0 (a)
- * Step 0: rank r sends block ra to rank (r+1) and receives bloc (r-1)a
+ * Step 0: rank r sends block ra to rank (r+1) and receives bloc (r-1)a
* from rank (r-1) [with wraparound].
- * # 0 1 2
+ * # 0 1 2
* [00a] [00a+10a] [20a]
* [00b] [10b] [20b]
* [01a] [11a] [11a+21a]
* [22a+02a] [12a] [22a]
* [02b] [12b] [22b]
*
- * Step 1: rank r sends block (r-1)a to rank (r+1) and receives bloc
+ * Step 1: rank r sends block (r-1)a to rank (r+1) and receives bloc
* (r-2)a from rank (r-1) [with wraparound].
- * # 0 1 2
+ * # 0 1 2
* [00a] [00a+10a] [00a+10a+20a]
* [00b] [10b] [20b]
* [11a+21a+01a] [11a] [11a+21a]
* [01b] [11b] [21b]
* [22a+02a] [22a+02a+12a] [22a]
- * [02b] [12b] [22b]
+ * [02b] [12b] [22b]
*
* COMPUTATION PHASE 1 (b)
- * Step 0: rank r sends block rb to rank (r+1) and receives bloc (r-1)b
+ * Step 0: rank r sends block rb to rank (r+1) and receives bloc (r-1)b
* from rank (r-1) [with wraparound].
- * # 0 1 2
+ * # 0 1 2
* [00a] [00a+10a] [20a]
* [00b] [00b+10b] [20b]
* [01a] [11a] [11a+21a]
* [22a+02a] [12a] [22a]
* [22b+02b] [12b] [22b]
*
- * Step 1: rank r sends block (r-1)b to rank (r+1) and receives bloc
+ * Step 1: rank r sends block (r-1)b to rank (r+1) and receives bloc
* (r-2)b from rank (r-1) [with wraparound].
- * # 0 1 2
+ * # 0 1 2
* [00a] [00a+10a] [00a+10a+20a]
* [00b] [10b] [0bb+10b+20b]
* [11a+21a+01a] [11a] [11a+21a]
* [11b+21b+01b] [11b] [21b]
* [22a+02a] [22a+02a+12a] [22a]
- * [02b] [22b+01b+12b] [22b]
+ * [02b] [22b+01b+12b] [22b]
+ *
*
- *
* DISTRIBUTION PHASE: ring ALLGATHER with ranks shifted by 1 (same as
* in regular ring algorithm.
*
*/
-
+
#define COLL_TUNED_COMPUTED_SEGCOUNT(SEGSIZE, TYPELNG, SEGCOUNT) \
if( ((SEGSIZE) >= (TYPELNG)) && \
((SEGSIZE) < ((TYPELNG) * (SEGCOUNT))) ) { \
if( residual > ((TYPELNG) >> 1) ) \
(SEGCOUNT)++; \
} \
-
+
#define COLL_TUNED_COMPUTE_BLOCKCOUNT( COUNT, NUM_BLOCKS, SPLIT_INDEX, \
EARLY_BLOCK_COUNT, LATE_BLOCK_COUNT ) \
EARLY_BLOCK_COUNT = LATE_BLOCK_COUNT = COUNT / NUM_BLOCKS; \
namespace simgrid{
namespace smpi{
-int
+int
Coll_allreduce_ompi_ring_segmented::allreduce(void *sbuf, void *rbuf, int count,
MPI_Datatype dtype,
MPI_Op op,
- MPI_Comm comm)
+ MPI_Comm comm)
{
int ret = MPI_SUCCESS;
int line;
int k, recv_from, send_to;
- int early_blockcount, late_blockcount, split_rank;
+ int early_blockcount, late_blockcount, split_rank;
int segcount, max_segcount;
int num_phases, phase;
int block_count;
}
return MPI_SUCCESS;
}
-
+
/* Determine segment count based on the suggested segment size */
extent = dtype->get_extent();
if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
/* Special case for count less than size * segcount - use regular ring */
if (count < size * segcount) {
XBT_DEBUG( "coll:tuned:allreduce_ring_segmented rank %d/%d, count %d, switching to regular ring", rank, size, count);
- return (Coll_allreduce_lr::allreduce(sbuf, rbuf, count, dtype, op,
+ return (Coll_allreduce_lr::allreduce(sbuf, rbuf, count, dtype, op,
comm));
}
/* Determine the number of phases of the algorithm */
num_phases = count / (size * segcount);
- if ((count % (size * segcount) >= size) &&
+ if ((count % (size * segcount) >= size) &&
(count % (size * segcount) > ((size * segcount) / 2))) {
num_phases++;
}
- /* Determine the number of elements per block and corresponding
+ /* Determine the number of elements per block and corresponding
block sizes.
The blocks are divided into "early" and "late" ones:
- blocks 0 .. (split_rank - 1) are "early" and
+ blocks 0 .. (split_rank - 1) are "early" and
blocks (split_rank) .. (size - 1) are "late".
Early blocks are at most 1 element larger than the late ones.
Note, these blocks will be split into num_phases segments,
out of the largest one will have max_segcount elements.
*/
- COLL_TUNED_COMPUTE_BLOCKCOUNT( count, size, split_rank,
+ COLL_TUNED_COMPUTE_BLOCKCOUNT( count, size, split_rank,
early_blockcount, late_blockcount )
COLL_TUNED_COMPUTE_BLOCKCOUNT( early_blockcount, num_phases, inbi,
max_segcount, k)
ptrdiff_t phase_offset;
int early_phase_segcount, late_phase_segcount, split_phase, phase_count;
- /*
+ /*
For each of the remote nodes:
- post irecv for block (r-1)
- send block (r)
*/
send_to = (rank + 1) % size;
recv_from = (rank + size - 1) % size;
-
+
inbi = 0;
/* Initialize first receive from the neighbor on the left */
reqs[inbi] = Request::irecv(inbuf[inbi], max_segcount, dtype, recv_from,
/* Send first block (my block) to the neighbor on the right:
- compute my block and phase offset
- send data */
- block_offset = ((rank < split_rank)?
- (rank * early_blockcount) :
+ block_offset = ((rank < split_rank)?
+ (rank * early_blockcount) :
(rank * late_blockcount + split_rank));
block_count = ((rank < split_rank)? early_blockcount : late_blockcount);
COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
phase_count = ((phase < split_phase)?
(early_phase_segcount) : (late_phase_segcount));
phase_offset = ((phase < split_phase)?
- (phase * early_phase_segcount) :
+ (phase * early_phase_segcount) :
(phase * late_phase_segcount + split_phase));
tmpsend = ((char*)rbuf) + (block_offset + phase_offset) * extent;
Request::send(tmpsend, phase_count, dtype, send_to,
666, comm);
-
+
for (k = 2; k < size; k++) {
const int prevblock = (rank + size - k + 1) % size;
-
+
inbi = inbi ^ 0x1;
-
+
/* Post irecv for the current block */
reqs[inbi] = Request::irecv(inbuf[inbi], max_segcount, dtype, recv_from,
666, comm);
if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-
+
/* Wait on previous block to arrive */
Request::wait(&reqs[inbi ^ 0x1], MPI_STATUS_IGNORE);
-
+
/* Apply operation on previous block: result goes to rbuf
rbuf[prevblock] = inbuf[inbi ^ 0x1] (op) rbuf[prevblock]
*/
block_offset = ((prevblock < split_rank)?
(prevblock * early_blockcount) :
(prevblock * late_blockcount + split_rank));
- block_count = ((prevblock < split_rank)?
+ block_count = ((prevblock < split_rank)?
early_blockcount : late_blockcount);
COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
early_phase_segcount, late_phase_segcount)
phase_count = ((phase < split_phase)?
(early_phase_segcount) : (late_phase_segcount));
phase_offset = ((phase < split_phase)?
- (phase * early_phase_segcount) :
+ (phase * early_phase_segcount) :
(phase * late_phase_segcount + split_phase));
tmprecv = ((char*)rbuf) + (block_offset + phase_offset) * extent;
if(op!=MPI_OP_NULL) op->apply( inbuf[inbi ^ 0x1], tmprecv, &phase_count, dtype);
Request::send(tmprecv, phase_count, dtype, send_to,
666, comm);
}
-
+
/* Wait on the last block to arrive */
Request::wait(&reqs[inbi], MPI_STATUS_IGNORE);
-
- /* Apply operation on the last block (from neighbor (rank + 1)
+
+ /* Apply operation on the last block (from neighbor (rank + 1)
rbuf[rank+1] = inbuf[inbi] (op) rbuf[rank + 1] */
recv_from = (rank + 1) % size;
block_offset = ((recv_from < split_rank)?
(recv_from * early_blockcount) :
(recv_from * late_blockcount + split_rank));
- block_count = ((recv_from < split_rank)?
+ block_count = ((recv_from < split_rank)?
early_blockcount : late_blockcount);
COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
early_phase_segcount, late_phase_segcount)
phase_count = ((phase < split_phase)?
(early_phase_segcount) : (late_phase_segcount));
phase_offset = ((phase < split_phase)?
- (phase * early_phase_segcount) :
+ (phase * early_phase_segcount) :
(phase * late_phase_segcount + split_phase));
tmprecv = ((char*)rbuf) + (block_offset + phase_offset) * extent;
if(op!=MPI_OP_NULL) op->apply( inbuf[inbi], tmprecv, &phase_count, dtype);
for (k = 0; k < size - 1; k++) {
const int recv_data_from = (rank + size - k) % size;
const int send_data_from = (rank + 1 + size - k) % size;
- const int send_block_offset =
+ const int send_block_offset =
((send_data_from < split_rank)?
(send_data_from * early_blockcount) :
(send_data_from * late_blockcount + split_rank));
- const int recv_block_offset =
+ const int recv_block_offset =
((recv_data_from < split_rank)?
(recv_data_from * early_blockcount) :
(recv_data_from * late_blockcount + split_rank));
- block_count = ((send_data_from < split_rank)?
+ block_count = ((send_data_from < split_rank)?
early_blockcount : late_blockcount);
tmprecv = (char*)rbuf + recv_block_offset * extent;
// processes of rank < 2*rem send their data to
// (rank+1). These even-numbered processes no longer
// participate in the algorithm until the very end. The
- // remaining processes form a nice power-of-two.
+ // remaining processes form a nice power-of-two.
if (rank < 2 * rem) {
- // even
+ // even
if (rank % 2 == 0) {
Request::send(rbuff, count, dtype, rank + 1, tag, comm);
// the operation is commutative or not.
if(op!=MPI_OP_NULL) op->apply( tmp_buf, rbuff, &count, dtype);
- // change the rank
+ // change the rank
newrank = rank / 2;
}
}
- else // rank >= 2 * rem
+ else // rank >= 2 * rem
newrank = rank - rem;
// If op is user-defined or count is less than pof2, use
// datatypes on one process and derived on another as long as
// the type maps are the same. Breaking up derived
// datatypes to do the reduce-scatter is tricky, therefore
- // using recursive doubling in that case.)
+ // using recursive doubling in that case.)
if (newrank != -1) {
// do a reduce-scatter followed by allgather. for the
// reduce-scatter, calculate the count that each process receives
- // and the displacement within the buffer
+ // and the displacement within the buffer
cnts = (int *) xbt_malloc(pof2 * sizeof(int));
disps = (int *) xbt_malloc(pof2 * sizeof(int));
last_idx = pof2;
while (mask < pof2) {
newdst = newrank ^ mask;
- // find real rank of dest
+ // find real rank of dest
dst = (newdst < rem) ? newdst * 2 + 1 : newdst + rem;
send_cnt = recv_cnt = 0;
recv_cnt += cnts[i];
}
- // Send data from recvbuf. Recv into tmp_buf
+ // Send data from recvbuf. Recv into tmp_buf
Request::sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt,
dtype, dst, tag,
(char *) tmp_buf + disps[recv_idx] * extent, recv_cnt,
dtype, dst, tag, comm, &status);
// tmp_buf contains data received in this step.
- // recvbuf contains data accumulated so far
+ // recvbuf contains data accumulated so far
// This algorithm is used only for predefined ops
// and predefined ops are always commutative.
if(op!=MPI_OP_NULL) op->apply( (char *) tmp_buf + disps[recv_idx] * extent,
(char *) rbuff + disps[recv_idx] * extent, &recv_cnt, dtype);
- // update send_idx for next iteration
+ // update send_idx for next iteration
send_idx = recv_idx;
mask <<= 1;
// update last_idx, but not in last iteration because the value
- // is needed in the allgather step below.
+ // is needed in the allgather step below.
if (mask < pof2)
last_idx = recv_idx + pof2 / mask;
}
- // now do the allgather
+ // now do the allgather
mask >>= 1;
while (mask > 0) {
send_cnt = recv_cnt = 0;
if (newrank < newdst) {
- // update last_idx except on first iteration
+ // update last_idx except on first iteration
if (mask != pof2 / 2)
last_idx = last_idx + pof2 / (mask * 2);
// participate above.
if (rank < 2 * rem) {
- if (rank % 2) // odd
+ if (rank % 2) // odd
Request::send(rbuff, count, dtype, rank - 1, tag, comm);
- else // even
+ else // even
Request::recv(rbuff, count, dtype, rank + 1, tag, comm, &status);
}
if(op!=MPI_OP_NULL) op->apply( tmp_buf, (char *) recv + recv_idx * extent, &recv_cnt,
dtype);
- // update send_idx for next iteration
+ // update send_idx for next iteration
send_idx = recv_idx;
mask >>= 1;
}
if(op!=MPI_OP_NULL) op->apply( tmp_buf, (char *) rbuff + recv_idx * extent, &recv_cnt,
dtype);
- // update send_idx for next iteration
+ // update send_idx for next iteration
send_idx = recv_idx;
mask >>= 1;
}
// processes of rank < 2*rem send their data to
// (rank+1). These even-numbered processes no longer
// participate in the algorithm until the very end. The
- // remaining processes form a nice power-of-two.
+ // remaining processes form a nice power-of-two.
if (rank < 2 * rem) {
- // even
+ // even
if (rank % 2 == 0) {
Request::send(rbuff, count, dtype, rank + 1, tag, comm);
// the operation is commutative or not.
if(op!=MPI_OP_NULL) op->apply( tmp_buf, rbuff, &count, dtype);
- // change the rank
+ // change the rank
newrank = rank / 2;
}
}
- else // rank >= 2 * rem
+ else // rank >= 2 * rem
newrank = rank - rem;
// If op is user-defined or count is less than pof2, use
// datatypes on one process and derived on another as long as
// the type maps are the same. Breaking up derived
// datatypes to do the reduce-scatter is tricky, therefore
- // using recursive doubling in that case.)
+ // using recursive doubling in that case.)
if (newrank != -1) {
mask = 0x1;
while (mask < pof2) {
newdst = newrank ^ mask;
- // find real rank of dest
+ // find real rank of dest
dst = (newdst < rem) ? newdst * 2 + 1 : newdst + rem;
// Send the most current data, which is in recvbuf. Recv
- // into tmp_buf
+ // into tmp_buf
Request::sendrecv(rbuff, count, dtype, dst, tag, tmp_buf, count, dtype,
dst, tag, comm, &status);
// tmp_buf contains data received in this step.
- // recvbuf contains data accumulated so far
+ // recvbuf contains data accumulated so far
// op is commutative OR the order is already right
// we assume it is commuttive op
// participate above.
if (rank < 2 * rem) {
- if (rank % 2) // odd
+ if (rank % 2) // odd
Request::send(rbuff, count, dtype, rank - 1, tag, comm);
- else // even
+ else // even
Request::recv(rbuff, count, dtype, rank + 1, tag, comm, &status);
}
* under the terms of the license (GNU LGPL) which comes with this package. */
#include "../colls_private.h"
-/* IMPLEMENTED BY PITCH PATARASUK
- Non-topoloty-specific (however, number of cores/node need to be changed)
+/* IMPLEMENTED BY PITCH PATARASUK
+ Non-topoloty-specific (however, number of cores/node need to be changed)
all-reduce operation designed for smp clusters
- It uses 2-layer communication: binomial for both intra-communication
+ It uses 2-layer communication: binomial for both intra-communication
inter-communication
The communication are done in a pipeline fashion */
-/* this is a default segment size for pipelining,
+/* this is a default segment size for pipelining,
but it is typically passed as a command line argument */
int allreduce_smp_binomial_pipeline_segment_size = 4096;
}
}
- /* start binomial reduce inter-communication between each SMP nodes:
+ /* start binomial reduce inter-communication between each SMP nodes:
each node only have one process that can communicate to other nodes */
if ((phase > 0) && (phase < (pipelength + 1))) {
if (intra_rank == 0) {
}
}
- /* start binomial broadcast inter-communication between each SMP nodes:
+ /* start binomial broadcast inter-communication between each SMP nodes:
each node only have one process that can communicate to other nodes */
if ((phase > 1) && (phase < (pipelength + 2))) {
if (intra_rank == 0) {
* under the terms of the license (GNU LGPL) which comes with this package. */
#include "../colls_private.h"
-/* IMPLEMENTED BY PITCH PATARASUK
- Non-topoloty-specific (however, number of cores/node need to be changed)
+/* IMPLEMENTED BY PITCH PATARASUK
+ Non-topoloty-specific (however, number of cores/node need to be changed)
all-reduce operation designed for smp clusters
- It uses 2-layer communication: binomial for both intra-communication
+ It uses 2-layer communication: binomial for both intra-communication
inter-communication*/
mask <<= 1;
}
- /* start binomial reduce inter-communication between each SMP nodes:
+ /* start binomial reduce inter-communication between each SMP nodes:
each node only have one process that can communicate to other nodes */
if (intra_rank == 0) {
mask = 1;
}
}
- /* start binomial broadcast inter-communication between each SMP nodes:
+ /* start binomial broadcast inter-communication between each SMP nodes:
each node only have one process that can communicate to other nodes */
if (intra_rank == 0) {
mask = 1;
* under the terms of the license (GNU LGPL) which comes with this package. */
#include "../colls_private.h"
-/* IMPLEMENTED BY PITCH PATARASUK
- Non-topoloty-specific (however, number of cores/node need to be changed)
+/* IMPLEMENTED BY PITCH PATARASUK
+ Non-topoloty-specific (however, number of cores/node need to be changed)
all-reduce operation designed for smp clusters
- It uses 2-layer communication: binomial for intra-communication
+ It uses 2-layer communication: binomial for intra-communication
and rdb for inter-communication*/
} /* end binomial reduce intra-communication */
- /* start rdb (recursive doubling) all-reduce inter-communication
+ /* start rdb (recursive doubling) all-reduce inter-communication
between each SMP nodes : each node only have one process that can communicate
to other nodes */
if (intra_rank == 0) {
newrank = inter_rank - rem;
}
- /* example inter-communication RDB rank change algorithm
+ /* example inter-communication RDB rank change algorithm
0,4,8,12..36 <= true rank (assume 4 core per SMP)
0123 4567 89 <= inter_rank
1 3 4567 89 (1,3 got data from 0,2 : 0,2 will be idle until the end)
- 0 1 4567 89
+ 0 1 4567 89
0 1 2345 67 => newrank
*/
}
}
- /* non pof2 case
- left-over processes (all even ranks: < 2 * rem) get the result
+ /* non pof2 case
+ left-over processes (all even ranks: < 2 * rem) get the result
*/
if (inter_rank < 2 * rem) {
if (inter_rank % 2) {
/* This program is free software; you can redistribute it and/or modify it
* under the terms of the license (GNU LGPL) which comes with this package. */
-/*
+/*
* implemented by Pitch Patarasuk, 07/01/2007
*/
#include "../colls_private.h"
algorithm. It actually performs allgather operation in x dimension
then in the y dimension. Each node then extracts the needed data.
The communication in each dimension follows "simple."
-
+
* Auther: Ahmad Faraj
****************************************************************************/
* under the terms of the license (GNU LGPL) which comes with this package. */
#include "../colls_private.h"
-
+
/*Naive and simple basic alltoall implementation. */
* copyright file COPYRIGHT in the top level MVAPICH2 directory.
*
*/
-
+
//correct on stampede
#define MV2_ALLTOALL_THROTTLE_FACTOR 4
-
+
#include "../colls_private.h"
namespace simgrid{
namespace smpi{
int dst, rank;
MPI_Request *reqarray;
MPI_Status *starray;
-
+
if (recvcount == 0) return MPI_SUCCESS;
-
+
comm_size = comm->size();
rank = comm->rank();
-
+
/* Get extent of send and recv types */
recvtype_extent = recvtype->get_extent();
sendtype_extent = sendtype->get_extent();
-
+
/* Medium-size message. Use isend/irecv with scattered
destinations. Use Tony Ladd's modification to post only
a small number of isends/irecvs at a time. */
there are only a few isend/irecvs left)
*/
int ii, ss, bblock;
-
+
//Stampede is configured with
bblock = MV2_ALLTOALL_THROTTLE_FACTOR;//mv2_coll_param.alltoall_throttle_factor;
-
+
if (bblock >= comm_size) bblock = comm_size;
/* If throttle_factor is n, each process posts n pairs of isend/irecv
in each iteration. */
-
+
/* FIXME: This should use the memory macros (there are storage
leaks here if there is an error, for example) */
reqarray= (MPI_Request*)xbt_malloc(2*bblock*sizeof(MPI_Request));
-
+
starray=(MPI_Status *)xbt_malloc(2*bblock*sizeof(MPI_Status));
-
+
for (ii=0; ii<comm_size; ii+=bblock) {
ss = comm_size-ii < bblock ? comm_size-ii : bblock;
/* do the communication -- post ss sends and receives: */
COLL_TAG_ALLTOALL, comm);
}
-
+
/* ... then wait for them to finish: */
Request::waitall(2*ss,reqarray,starray);
-
-
+
+
/* --BEGIN ERROR HANDLING-- */
if (mpi_errno == MPI_ERR_IN_STATUS) {
for (j=0; j<2*ss; j++) {
xbt_free(starray);
xbt_free(reqarray);
return (mpi_errno);
-
+
}
}
}
* Return: int
- * inputs:
+ * inputs:
send_buff: send input buffer
send_count: number of elements to send
send_type: data type of elements being sent
* Descrp: Function realizes the allgather operation using the recursive
doubling algorithm.
- * Auther: MPICH / slightly modified by Ahmad Faraj.
+ * Auther: MPICH / slightly modified by Ahmad Faraj.
****************************************************************************/
namespace simgrid{
* under the terms of the license (GNU LGPL) which comes with this package. */
#include "../colls_private.h"
-/*
+/*
* Linear functions are copied from the basic coll module. For
* some small number of nodes and/or small data sizes they are just as
* fast as tuned/tree based segmenting operations and as such may be
* selected by the decision functions. These are copied into this module
* due to the way we select modules in V1. i.e. in V2 we will handle this
- * differently and so will not have to duplicate code.
- * GEF Oct05 after asking Jeff.
+ * differently and so will not have to duplicate code.
+ * GEF Oct05 after asking Jeff.
*/
namespace simgrid{
namespace smpi{
);
preq++;
++nreqs;
-
+
}
/* Now post all sends */
recv_chunk = recv_type->get_extent();
int pof2 = ((num_procs != 0) && ((num_procs & (~num_procs + 1)) == num_procs));
for (i = 0; i < num_procs; i++) {
-
+
if (pof2 == 1) {
/* use exclusive-or algorithm */
src = dst = rank ^ i;
*
* Additional copyrights may follow
*/
-
+
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
*
/* N2_prev = greatest power of two < size of Comm */
for( N2_prev = 1; N2_prev <= size; N2_prev <<= 1 );
N2_prev >>= 1;
-
+
int surfeit = size - N2_prev;
/* Perform a combine-like operation */
/*
- * Barrier is ment to be a synchronous operation, as some BTLs can mark
- * a request done before its passed to the NIC and progress might not be made
- * elsewhere we cannot allow a process to exit the barrier until its last
+ * Barrier is ment to be a synchronous operation, as some BTLs can mark
+ * a request done before its passed to the NIC and progress might not be made
+ * elsewhere we cannot allow a process to exit the barrier until its last
* [round of] sends are completed.
*
- * It is last round of sends rather than 'last' individual send as each pair of
- * peers can use different channels/devices/btls and the receiver of one of
+ * It is last round of sends rather than 'last' individual send as each pair of
+ * peers can use different channels/devices/btls and the receiver of one of
* these sends might be forced to wait as the sender
- * leaves the collective and does not make progress until the next mpi call
+ * leaves the collective and does not make progress until the next mpi call
*
*/
right = ((rank+1)%size);
if (rank > 0) { /* receive message from the left */
- Request::recv((void*)NULL, 0, MPI_BYTE, left,
+ Request::recv((void*)NULL, 0, MPI_BYTE, left,
COLL_TAG_BARRIER, comm,
MPI_STATUS_IGNORE);
}
/* Send message to the right */
- Request::send((void*)NULL, 0, MPI_BYTE, right,
+ Request::send((void*)NULL, 0, MPI_BYTE, right,
COLL_TAG_BARRIER,
comm);
/* root needs to receive from the last node */
if (rank == 0) {
- Request::recv((void*)NULL, 0, MPI_BYTE, left,
+ Request::recv((void*)NULL, 0, MPI_BYTE, left,
COLL_TAG_BARRIER, comm,
MPI_STATUS_IGNORE);
}
/* Allow nodes to exit */
if (rank > 0) { /* post Receive from left */
- Request::recv((void*)NULL, 0, MPI_BYTE, left,
+ Request::recv((void*)NULL, 0, MPI_BYTE, left,
COLL_TAG_BARRIER, comm,
MPI_STATUS_IGNORE);
}
/* send message to the right one */
- Request::send((void*)NULL, 0, MPI_BYTE, right,
+ Request::send((void*)NULL, 0, MPI_BYTE, right,
COLL_TAG_BARRIER,
comm);
-
+
/* rank 0 post receive from the last node */
if (rank == 0) {
- Request::recv((void*)NULL, 0, MPI_BYTE, left,
+ Request::recv((void*)NULL, 0, MPI_BYTE, left,
COLL_TAG_BARRIER, comm,
MPI_STATUS_IGNORE);
}
rank = comm->rank();
size = comm->size();
XBT_DEBUG(
- "ompi_coll_tuned_barrier_ompi_recursivedoubling rank %d",
+ "ompi_coll_tuned_barrier_ompi_recursivedoubling rank %d",
rank);
/* do nearest power of 2 less than size calc */
if (rank < (size - adjsize)) {
/* send enter message to higher ranked node */
remote = rank + adjsize;
- Request::send((void*)NULL, 0, MPI_BYTE, remote,
+ Request::send((void*)NULL, 0, MPI_BYTE, remote,
COLL_TAG_BARRIER,
comm);
"ompi_coll_tuned_barrier_ompi_bruck rank %d", rank);
/* exchange data with rank-2^k and rank+2^k */
- for (distance = 1; distance < size; distance <<= 1) {
+ for (distance = 1; distance < size; distance <<= 1) {
from = (rank + size - distance) % size;
to = (rank + distance) % size;
/* send message to lower ranked node */
- Request::sendrecv(NULL, 0, MPI_BYTE, to,
+ Request::sendrecv(NULL, 0, MPI_BYTE, to,
COLL_TAG_BARRIER,
- NULL, 0, MPI_BYTE, from,
+ NULL, 0, MPI_BYTE, from,
COLL_TAG_BARRIER,
comm, MPI_STATUS_IGNORE);
}
"ompi_coll_tuned_barrier_ompi_two_procs rank %d", remote);
remote = (remote + 1) & 0x1;
- Request::sendrecv(NULL, 0, MPI_BYTE, remote,
+ Request::sendrecv(NULL, 0, MPI_BYTE, remote,
COLL_TAG_BARRIER,
- NULL, 0, MPI_BYTE, remote,
+ NULL, 0, MPI_BYTE, remote,
COLL_TAG_BARRIER,
comm, MPI_STATUS_IGNORE);
return (MPI_SUCCESS);
/* All non-root send & receive zero-length message. */
if (rank > 0) {
- Request::send (NULL, 0, MPI_BYTE, 0,
+ Request::send (NULL, 0, MPI_BYTE, 0,
COLL_TAG_BARRIER,
comm);
- Request::recv (NULL, 0, MPI_BYTE, 0,
+ Request::recv (NULL, 0, MPI_BYTE, 0,
COLL_TAG_BARRIER,
comm, MPI_STATUS_IGNORE);
}
/*
* Another recursive doubling type algorithm, but in this case
- * we go up the tree and back down the tree.
+ * we go up the tree and back down the tree.
*/
int Coll_barrier_ompi_tree::barrier(MPI_Comm comm)
{
rank = comm->rank();
size = comm->size();
XBT_DEBUG(
- "ompi_coll_tuned_barrier_ompi_tree %d",
+ "ompi_coll_tuned_barrier_ompi_tree %d",
rank);
/* Find the nearest power of 2 of the communicator size. */
partner = rank ^ jump;
if (!(partner & (jump-1)) && partner < size) {
if (partner > rank) {
- Request::recv (NULL, 0, MPI_BYTE, partner,
+ Request::recv (NULL, 0, MPI_BYTE, partner,
COLL_TAG_BARRIER, comm,
MPI_STATUS_IGNORE);
} else if (partner < rank) {
}
}
}
-
+
depth>>=1;
for (jump = depth; jump>0; jump>>=1) {
partner = rank ^ jump;
COLL_TAG_BARRIER,
comm);
} else if (partner < rank) {
- Request::recv (NULL, 0, MPI_BYTE, partner,
+ Request::recv (NULL, 0, MPI_BYTE, partner,
COLL_TAG_BARRIER, comm,
MPI_STATUS_IGNORE);
}
/* segment is segment size in number of elements (not bytes) */
int segment = bcast_NTSB_segment_size_in_byte / extent;
- segment = segment == 0 ? 1 :segment;
+ segment = segment == 0 ? 1 :segment;
/* pipeline length */
int pipe_length = count / segment;
/* use for buffer offset for sending and receiving data = segment size in byte */
int increment = segment * extent;
- /* if the input size is not divisible by segment size =>
+ /* if the input size is not divisible by segment size =>
the small remainder will be done with native implementation */
int remainder = count % segment;
static int bcast_NTSL_segment_size_in_byte = 8192;
-/* Non-topology-specific pipelined linear-bcast function
+/* Non-topology-specific pipelined linear-bcast function
0->1, 1->2 ,2->3, ....., ->last node : in a pipeline fashion
*/
namespace simgrid{
/* segment is segment size in number of elements (not bytes) */
int segment = bcast_NTSL_segment_size_in_byte / extent;
- segment = segment == 0 ? 1 :segment;
+ segment = segment == 0 ? 1 :segment;
/* pipeline length */
int pipe_length = count / segment;
/* use for buffer offset for sending and receiving data = segment size in byte */
int increment = segment * extent;
- /* if the input size is not divisible by segment size =>
+ /* if the input size is not divisible by segment size =>
the small remainder will be done with native implementation */
int remainder = count % segment;
static int bcast_NTSL_segment_size_in_byte = 8192;
-/* Non-topology-specific pipelined linear-bcast function
+/* Non-topology-specific pipelined linear-bcast function
0->1, 1->2 ,2->3, ....., ->last node : in a pipeline fashion
*/
namespace simgrid{
/* segment is segment size in number of elements (not bytes) */
int segment = extent == 0 ? 1 : (bcast_NTSL_segment_size_in_byte / extent);
- segment = segment == 0 ? 1 :segment;
+ segment = segment == 0 ? 1 :segment;
/* pipeline length */
int pipe_length = count / segment;
/* use for buffer offset for sending and receiving data = segment size in byte */
int increment = segment * extent;
- /* if the input size is not divisible by segment size =>
+ /* if the input size is not divisible by segment size =>
the small remainder will be done with native implementation */
int remainder = count % segment;
else if (rank == 0)
Request::recv(buf, count, datatype, root, tag, comm, &status);
}
- // when a message is smaller than a block size => no pipeline
+ // when a message is smaller than a block size => no pipeline
if (count <= segment) {
// case ROOT-of-each-SMP
if (rank % host_num_core == 0) {
}
int segment = bcast_SMP_linear_segment_byte / extent;
- segment = segment == 0 ? 1 :segment;
+ segment = segment == 0 ? 1 :segment;
int pipe_length = count / segment;
int remainder = count % segment;
int increment = segment * extent;
if (size <= num_core) {
XBT_WARN("MPI_bcast_SMP_linear use default MPI_bcast.");
Coll_bcast_default::bcast(buf, count, datatype, root, comm);
- return MPI_SUCCESS;
+ return MPI_SUCCESS;
}
// if root is not zero send to rank zero first
if (root != 0) {
else if (rank == 0)
Request::recv(buf, count, datatype, root, tag, comm, &status);
}
- // when a message is smaller than a block size => no pipeline
+ // when a message is smaller than a block size => no pipeline
if (count <= segment) {
// case ROOT
if (rank == 0) {
/* segment is segment size in number of elements (not bytes) */
int segment = bcast_arrival_pattern_aware_wait_segment_size_in_byte / extent;
- segment = segment == 0 ? 1 :segment;
+ segment = segment == 0 ? 1 :segment;
/* pipeline length */
int pipe_length = count / segment;
/* use for buffer offset for sending and receiving data = segment size in byte */
int increment = segment * extent;
- /* if the input size is not divisible by segment size =>
+ /* if the input size is not divisible by segment size =>
the small remainder will be done with native implementation */
int remainder = count % segment;
/* segment is segment size in number of elements (not bytes) */
int segment = bcast_NTSL_segment_size_in_byte / extent;
- segment = segment == 0 ? 1 :segment;
+ segment = segment == 0 ? 1 :segment;
/* pipeline length */
int pipe_length = count / segment;
/* use for buffer offset for sending and receiving data = segment size in byte */
int increment = segment * extent;
- /* if the input size is not divisible by segment size =>
+ /* if the input size is not divisible by segment size =>
the small remainder will be done with native implementation */
int remainder = count % segment;
}else{
Request::waitall(pipe_length, recv_request_array, recv_status_array);
}
-
+
}
free(send_request_array);
if (count < size) {
XBT_WARN("MPI_bcast_arrival_scatter use default MPI_bcast.");
Colls::bcast(buf, count, datatype, root, comm);
- return MPI_SUCCESS;
+ return MPI_SUCCESS;
}
0, tag, comm, &status);
/* at this point all nodes in this set perform all-gather operation */
-
+
to = (myordering == (total_nodes - 1)) ? header_buf[0] : header_buf[myordering + 1];
from = (myordering == 0) ? header_buf[total_nodes - 1] : header_buf[myordering - 1];
extent = data_type->get_extent();
int segment = flattree_segment_in_byte / extent;
- segment = segment == 0 ? 1 :segment;
+ segment = segment == 0 ? 1 :segment;
int pipe_length = count / segment;
int increment = segment * extent;
if (pipe_length==0) {
extern int (*MV2_Bcast_intra_node_function) (void *buffer, int count, MPI_Datatype datatype,
int root, MPI_Comm comm_ptr);
-
+
extern int zcpy_knomial_factor;
extern int mv2_pipelined_zcpy_knomial_factor;
extern int bcast_segment_size;
if (MV2_Bcast_function==NULL){
MV2_Bcast_function=Coll_bcast_mpich::bcast;
}
-
+
if (MV2_Bcast_intra_node_function==NULL){
MV2_Bcast_intra_node_function= Coll_bcast_mpich::bcast;
}
-
+
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
}
-
+
shmem_comm = comm->get_intra_comm();
local_rank = shmem_comm->rank();
local_size = shmem_comm->size();
int* leaders_map = comm->get_leaders_map();
leader_of_root = comm->group()->rank(leaders_map[root]);
leader_root = leader_comm->group()->rank(leaders_map[root]);
-
-
+
+
if (local_size > 1) {
if ((local_rank == 0) && (root != rank) && (leader_root == global_rank)) {
Request::recv(buffer, count, datatype, root,
if (MV2_Bcast_function==NULL){
MV2_Bcast_function=Coll_bcast_mpich::bcast;
}
-
+
if (MV2_Bcast_intra_node_function==NULL){
MV2_Bcast_intra_node_function= Coll_bcast_mpich::bcast;
}
-
+
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
}
-
+
local_size = comm->size();
rank = comm->rank();
int mpi_errno = MPI_SUCCESS;
int comm_size;
int two_level_bcast = 1;
- size_t nbytes = 0;
+ size_t nbytes = 0;
int is_homogeneous, is_contig;
MPI_Aint type_size;
void *tmp_buf = NULL;
if (MV2_Bcast_function==NULL){
MV2_Bcast_function=Coll_bcast_mpich::bcast;
}
-
+
if (MV2_Bcast_intra_node_function==NULL){
MV2_Bcast_intra_node_function= Coll_bcast_mpich::bcast;
}
-
+
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
}
-
+
comm_size = comm->size();
// rank = comm->rank();
/*
namespace simgrid{
namespace smpi{
int Coll_bcast_ompi_pipeline::bcast( void* buffer,
- int original_count,
- MPI_Datatype datatype,
+ int original_count,
+ MPI_Datatype datatype,
int root,
MPI_Comm comm)
{
size_t segsize =1024 << 7;
//mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
//mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
+
// return ompi_coll_tuned_bcast_intra_generic( buffer, count, datatype, root, comm, module,
// count_by_segment, data->cached_pipeline );
ompi_coll_tree_t * tree = ompi_coll_tuned_topo_build_chain( 1, comm, root );
int rank, size;
int segindex;
int num_segments; /* Number of segments */
- int sendcount; /* number of elements sent in this segment */
+ int sendcount; /* number of elements sent in this segment */
size_t realsegsize;
char *tmpbuf;
ptrdiff_t extent;
MPI_Request recv_reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL};
MPI_Request *send_reqs = NULL;
int req_index;
-
+
/**
* Determine number of elements sent per operation.
*/
const double a_p16 = 3.2118e-6; /* [1 / byte] */
- const double b_p16 = 8.7936;
+ const double b_p16 = 8.7936;
const double a_p64 = 2.3679e-6; /* [1 / byte] */
- const double b_p64 = 1.1787;
+ const double b_p64 = 1.1787;
const double a_p128 = 1.6134e-6; /* [1 / byte] */
const double b_p128 = 2.1102;
size_t message_size;
message_size = type_size * (unsigned long)original_count; /* needed for decision */
if (size < (a_p128 * message_size + b_p128)) {
- //Pipeline with 128KB segments
+ //Pipeline with 128KB segments
segsize = 1024 << 7;
}else if (size < (a_p64 * message_size + b_p64)) {
- // Pipeline with 64KB segments
+ // Pipeline with 64KB segments
segsize = 1024 << 6;
}else if (size < (a_p16 * message_size + b_p16)) {
- //Pipeline with 16KB segments
+ //Pipeline with 16KB segments
segsize = 1024 << 4;
}
extent = datatype->get_extent();
num_segments = (original_count + count_by_segment - 1) / count_by_segment;
realsegsize = count_by_segment * extent;
-
+
/* Set the buffer pointers */
tmpbuf = (char *) buffer;
/* Root code */
if( rank == root ) {
- /*
+ /*
For each segment:
- send segment to all children.
The last segment may have less elements than other segments.
if( segindex == (num_segments - 1) ) {
sendcount = original_count - segindex * count_by_segment;
}
- for( i = 0; i < tree->tree_nextsize; i++ ) {
+ for( i = 0; i < tree->tree_nextsize; i++ ) {
send_reqs[i] = Request::isend(tmpbuf, sendcount, datatype,
- tree->tree_next[i],
+ tree->tree_next[i],
COLL_TAG_BCAST, comm);
- }
+ }
/* complete the sends before starting the next sends */
- Request::waitall( tree->tree_nextsize, send_reqs,
+ Request::waitall( tree->tree_nextsize, send_reqs,
MPI_STATUSES_IGNORE );
/* update tmp buffer */
tmpbuf += realsegsize;
}
- }
-
+ }
+
/* Intermediate nodes code */
- else if( tree->tree_nextsize > 0 ) {
- /*
- Create the pipeline.
+ else if( tree->tree_nextsize > 0 ) {
+ /*
+ Create the pipeline.
1) Post the first receive
2) For segments 1 .. num_segments
- post new receive
recv_reqs[req_index]=Request::irecv(tmpbuf, count_by_segment, datatype,
tree->tree_prev, COLL_TAG_BCAST,
comm);
-
+
for( segindex = 1; segindex < num_segments; segindex++ ) {
-
+
req_index = req_index ^ 0x1;
-
+
/* post new irecv */
recv_reqs[req_index]= Request::irecv( tmpbuf + realsegsize, count_by_segment,
- datatype, tree->tree_prev,
+ datatype, tree->tree_prev,
COLL_TAG_BCAST,
comm);
-
+
/* wait for and forward the previous segment to children */
- Request::wait( &recv_reqs[req_index ^ 0x1],
+ Request::wait( &recv_reqs[req_index ^ 0x1],
MPI_STATUSES_IGNORE );
-
- for( i = 0; i < tree->tree_nextsize; i++ ) {
+
+ for( i = 0; i < tree->tree_nextsize; i++ ) {
send_reqs[i]=Request::isend(tmpbuf, count_by_segment, datatype,
- tree->tree_next[i],
+ tree->tree_next[i],
COLL_TAG_BCAST, comm );
- }
-
+ }
+
/* complete the sends before starting the next iteration */
- Request::waitall( tree->tree_nextsize, send_reqs,
+ Request::waitall( tree->tree_nextsize, send_reqs,
MPI_STATUSES_IGNORE );
-
+
/* Update the receive buffer */
tmpbuf += realsegsize;
}
sendcount = original_count - (num_segments - 1) * count_by_segment;
for( i = 0; i < tree->tree_nextsize; i++ ) {
send_reqs[i] = Request::isend(tmpbuf, sendcount, datatype,
- tree->tree_next[i],
+ tree->tree_next[i],
COLL_TAG_BCAST, comm);
}
-
- Request::waitall( tree->tree_nextsize, send_reqs,
+
+ Request::waitall( tree->tree_nextsize, send_reqs,
MPI_STATUSES_IGNORE );
}
-
+
/* Leaf nodes */
else {
- /*
+ /*
Receive all segments from parent in a loop:
1) post irecv for the first segment
2) for segments 1 .. num_segments
req_index = req_index ^ 0x1;
tmpbuf += realsegsize;
/* post receive for the next segment */
- recv_reqs[req_index] = Request::irecv(tmpbuf, count_by_segment, datatype,
+ recv_reqs[req_index] = Request::irecv(tmpbuf, count_by_segment, datatype,
tree->tree_prev, COLL_TAG_BCAST,
comm);
/* wait on the previous segment */
- Request::wait( &recv_reqs[req_index ^ 0x1],
+ Request::wait( &recv_reqs[req_index ^ 0x1],
MPI_STATUS_IGNORE );
}
#define MAXTREEFANOUT 32
namespace simgrid{
namespace smpi{
-
+
int
Coll_bcast_ompi_split_bintree::bcast ( void* buffer,
- int count,
- MPI_Datatype datatype,
+ int count,
+ MPI_Datatype datatype,
int root,
MPI_Comm comm)
{
int segcount[2]; /* Number ompi_request_wait_allof elements sent with each segment */
uint32_t counts[2];
int num_segments[2]; /* Number of segmenets */
- int sendcount[2]; /* the same like segcount, except for the last segment */
+ int sendcount[2]; /* the same like segcount, except for the last segment */
size_t realsegsize[2];
char *tmpbuf[2];
size_t type_size;
ptrdiff_t type_extent;
-
-
+
+
MPI_Request base_req, new_req;
ompi_coll_tree_t *tree;
// mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
//compute again segsize
const size_t intermediate_message_size = 370728;
size_t message_size = datatype->size() * (unsigned long)count;
- if(message_size < intermediate_message_size)
+ if(message_size < intermediate_message_size)
segsize = 1024 ;
else
segsize = 1024 << 3;
-
+
XBT_DEBUG("ompi_coll_tuned_bcast_intra_split_bintree rank %d root %d ss %5d", rank, root, segsize);
if (size == 1) {
if (segsize < ((uint32_t)type_size)) {
segsize = type_size; /* push segsize up to hold one type */
}
- segcount[0] = segcount[1] = segsize / type_size;
+ segcount[0] = segcount[1] = segsize / type_size;
num_segments[0] = counts[0]/segcount[0];
if ((counts[0] % segcount[0]) != 0) num_segments[0]++;
num_segments[1] = counts[1]/segcount[1];
(segsize > counts[0] * type_size) ||
(segsize > counts[1] * type_size) ) {
/* call linear version here ! */
- return (Coll_bcast_SMP_linear::bcast ( buffer, count, datatype,
+ return (Coll_bcast_SMP_linear::bcast ( buffer, count, datatype,
root, comm));
}
type_extent = datatype->get_extent();
-
+
/* Determine real segment size */
realsegsize[0] = segcount[0] * type_extent;
realsegsize[1] = segcount[1] * type_extent;
-
+
/* set the buffer pointers */
tmpbuf[0] = (char *) buffer;
tmpbuf[1] = (char *) buffer+counts[0] * type_extent;
/* determine if I am left (0) or right (1), (root is right) */
lr = ((rank + size - root)%size + 1)%2;
-
+
/* root code */
if( rank == root ) {
/* determine segment count */
- sendcount[0] = segcount[0];
+ sendcount[0] = segcount[0];
sendcount[1] = segcount[1];
/* for each segment */
for (segindex = 0; segindex < num_segments[0]; segindex++) {
continue;
}
/* determine how many elements are being sent in this round */
- if(segindex == (num_segments[i] - 1))
+ if(segindex == (num_segments[i] - 1))
sendcount[i] = counts[i] - segindex*segcount[i];
/* send data */
Request::send(tmpbuf[i], sendcount[i], datatype,
tmpbuf[i] += realsegsize[i];
}
}
- }
-
+ }
+
/* intermediate nodes code */
- else if( tree->tree_nextsize > 0 ) {
+ else if( tree->tree_nextsize > 0 ) {
/* Intermediate nodes:
* It will receive segments only from one half of the data.
- * Which one is determined by whether the node belongs to the "left" or "right"
+ * Which one is determined by whether the node belongs to the "left" or "right"
* subtree. Topoloby building function builds binary tree such that
* odd "shifted ranks" ((rank + size - root)%size) are on the left subtree,
* and even on the right subtree.
*
* Create the pipeline. We first post the first receive, then in the loop we
- * post the next receive and after that wait for the previous receive to complete
+ * post the next receive and after that wait for the previous receive to complete
* and we disseminating the data to all children.
*/
sendcount[lr] = segcount[lr];
for( segindex = 1; segindex < num_segments[lr]; segindex++ ) {
/* determine how many elements to expect in this round */
- if( segindex == (num_segments[lr] - 1))
+ if( segindex == (num_segments[lr] - 1))
sendcount[lr] = counts[lr] - segindex*segcount[lr];
/* post new irecv */
new_req = Request::irecv( tmpbuf[lr] + realsegsize[lr], sendcount[lr],
} /* end of for each child */
/* upate the base request */
- base_req = new_req;
+ base_req = new_req;
/* go to the next buffer (ie. the one corresponding to the next recv) */
tmpbuf[lr] += realsegsize[lr];
} /* end of for segindex */
Request::send(tmpbuf[lr], sendcount[lr], datatype,
tree->tree_next[i], COLL_TAG_BCAST, comm);
} /* end of for each child */
- }
-
+ }
+
/* leaf nodes */
- else {
+ else {
/* Just consume segments as fast as possible */
sendcount[lr] = segcount[lr];
for (segindex = 0; segindex < num_segments[lr]; segindex++) {
tmpbuf[1] = (char *) buffer+counts[0] * type_extent;
/* Step 2:
- Find your immediate pair (identical node in opposite subtree) and SendRecv
+ Find your immediate pair (identical node in opposite subtree) and SendRecv
data buffer with them.
- The tree building function ensures that
+ The tree building function ensures that
if (we are not root)
if we are in the left subtree (lr == 0) our pair is (rank+1)%size.
if we are in the right subtree (lr == 1) our pair is (rank-1)%size
pair = (rank+size-1)%size;
}
- if ( (size%2) != 0 && rank != root) {
+ if ( (size%2) != 0 && rank != root) {
Request::sendrecv( tmpbuf[lr], counts[lr], datatype,
pair, COLL_TAG_BCAST,
Request::send(tmpbuf[1], counts[1], datatype,
(root+size-1)%size, COLL_TAG_BCAST, comm);
- }
+ }
/* last node receives right buffer from the root */
else if (rank == (root+size-1)%size) {
Request::recv(tmpbuf[1], counts[1], datatype,
root, COLL_TAG_BCAST,
comm, MPI_STATUS_IGNORE);
- }
+ }
/* everyone else exchanges buffers */
else {
Request::sendrecv( tmpbuf[lr], counts[lr], datatype,
pair, COLL_TAG_BCAST,
tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype,
pair, COLL_TAG_BCAST,
- comm, MPI_STATUS_IGNORE);
+ comm, MPI_STATUS_IGNORE);
}
}
xbt_free(tree);
return (MPI_SUCCESS);
-
+
}
nbytes = extent * count;
- scatter_size = (nbytes + num_procs - 1) / num_procs; // ceiling division
+ scatter_size = (nbytes + num_procs - 1) / num_procs; // ceiling division
curr_size = (rank == root) ? nbytes : 0; // root starts with all the data
relative_rank = (rank >= root) ? rank - root : rank - root + num_procs;
// allows you to post a larger recv.
if (recv_size <= 0)
curr_size = 0; // this process doesn't receive any data
- // because of uneven division
+ // because of uneven division
else {
Request::recv((char *) buff + relative_rank * scatter_size, recv_size,
MPI_BYTE, src, tag, comm, &status);
while (mask > 0) {
if (relative_rank + mask < num_procs) {
send_size = curr_size - scatter_size * mask;
- // mask is also the size of this process's subtree
+ // mask is also the size of this process's subtree
if (send_size > 0) {
dst = rank + mask;
processes may not get any data. For example if bufsize = 97 and
nprocs = 16, ranks 15 and 16 will get 0 data. On each process, the
scattered data is stored at the same offset in the buffer as it is
- on the root process. */
+ on the root process. */
scatter_size = (nbytes + comm_size - 1)/comm_size; /* ceiling division */
curr_size = (rank == root) ? nbytes : 0; /* root starts with all the
{
if (relative_rank & mask)
{
- src = rank - mask;
+ src = rank - mask;
if (src < 0) src += comm_size;
recv_size = nbytes - relative_rank*scatter_size;
/* recv_size is larger than what might actually be sent by the
sender. We don't need compute the exact value because MPI
- allows you to post a larger recv.*/
+ allows you to post a larger recv.*/
if (recv_size <= 0)
{
curr_size = 0; /* this process doesn't receive any data
{
if (relative_rank + mask < comm_size)
{
- send_size = curr_size - scatter_size * mask;
+ send_size = curr_size - scatter_size * mask;
/* mask is also the size of this process's subtree */
if (send_size > 0)
int
Coll_bcast_scatter_rdb_allgather::bcast (
- void *buffer,
- int count,
- MPI_Datatype datatype,
- int root,
+ void *buffer,
+ int count,
+ MPI_Datatype datatype,
+ int root,
MPI_Comm comm)
{
MPI_Status status;
{
relative_dst = relative_rank ^ mask;
- dst = (relative_dst + root) % comm_size;
+ dst = (relative_dst + root) % comm_size;
/* find offset into send and recv buffers.
zero out the least significant "i" bits of relative_rank and
relative_dst to find root of src and dst
subtrees. Use ranks of roots as index to send from
- and recv into buffer */
+ and recv into buffer */
dst_tree_root = relative_dst >> i;
dst_tree_root <<= i;
if (relative_dst < comm_size)
{
Request::sendrecv(((char *)tmp_buf + send_offset),
- curr_size, MPI_BYTE, dst, COLL_TAG_BCAST,
+ curr_size, MPI_BYTE, dst, COLL_TAG_BCAST,
((char *)tmp_buf + recv_offset),
- (nbytes-recv_offset < 0 ? 0 : nbytes-recv_offset),
+ (nbytes-recv_offset < 0 ? 0 : nbytes-recv_offset),
MPI_BYTE, dst, COLL_TAG_BCAST, comm, &status);
recv_size=Status::get_count(&status, MPI_BYTE);
curr_size += recv_size;
/* This part of the code will not currently be
executed because we are not using recursive
doubling for non power of two. Mark it as experimental
- so that it doesn't show up as red in the coverage tests. */
+ so that it doesn't show up as red in the coverage tests. */
/* --BEGIN EXPERIMENTAL-- */
if (dst_tree_root + mask > comm_size)
in a tree fashion. First find root of current tree
that is being divided into two. k is the number of
least-significant bits in this process's rank that
- must be zeroed out to find the rank of the root */
+ must be zeroed out to find the rank of the root */
j = mask;
k = 0;
while (j)
while (tmp_mask)
{
relative_dst = relative_rank ^ tmp_mask;
- dst = (relative_dst + root) % comm_size;
+ dst = (relative_dst + root) % comm_size;
tree_root = relative_rank >> k;
tree_root <<= k;
/* send only if this proc has data and destination
doesn't have data. */
- /* if (rank == 3) {
+ /* if (rank == 3) {
printf("rank %d, dst %d, root %d, nprocs_completed %d\n", relative_rank, relative_dst, tree_root, nprocs_completed);
fflush(stdout);
}*/
- if ((relative_dst > relative_rank) &&
+ if ((relative_dst > relative_rank) &&
(relative_rank < tree_root + nprocs_completed)
&& (relative_dst >= tree_root + nprocs_completed))
{
}
/* recv only if this proc. doesn't have data and sender
has data */
- else if ((relative_dst < relative_rank) &&
+ else if ((relative_dst < relative_rank) &&
(relative_dst < tree_root + nprocs_completed) &&
(relative_rank >= tree_root + nprocs_completed))
{
/* printf("Rank %d waiting to recv from rank %d\n",
relative_rank, dst); */
Request::recv(((char *)tmp_buf + offset),
- nbytes - offset,
+ nbytes - offset,
MPI_BYTE, dst, COLL_TAG_BCAST,
comm, &status);
/* nprocs_completed is also equal to the no. of processes
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
- *
+ *
* Additional copyrights may follow
*/
int schild, sparent;
int level; /* location of my rank in the tree structure of size */
int delta; /* number of nodes on my level */
- int slimit; /* total number of nodes on levels above me */
+ int slimit; /* total number of nodes on levels above me */
int shiftedrank;
int i;
ompi_coll_tree_t* tree;
return NULL;
}
- /*
- * Get size and rank of the process in this communicator
+ /*
+ * Get size and rank of the process in this communicator
*/
size = comm->size();
rank = comm->rank();
* Set root
*/
tree->tree_root = root;
-
- /*
+
+ /*
* Initialize tree
*/
tree->tree_fanout = fanout;
if( size < 2 ) {
return tree;
}
-
+
/*
- * Shift all ranks by root, so that the algorithm can be
+ * Shift all ranks by root, so that the algorithm can be
* designed as if root would be always 0
- * shiftedrank should be used in calculating distances
+ * shiftedrank should be used in calculating distances
* and position in tree
*/
shiftedrank = rank - root;
break;
}
}
-
+
/* find my parent */
slimit = calculate_num_nodes_up_to_level( fanout, level );
sparent = shiftedrank;
}
}
tree->tree_prev = (sparent+root)%size;
-
+
return tree;
}
/*
- * Constructs in-order binary tree which can be used for non-commutative reduce
+ * Constructs in-order binary tree which can be used for non-commutative reduce
* operations.
* Root of this tree is always rank (size-1) and fanout is 2.
* Here are some of the examples of this tree:
int parent, lchild, rchild;
ompi_coll_tree_t* tree;
- /*
- * Get size and rank of the process in this communicator
+ /*
+ * Get size and rank of the process in this communicator
*/
size = comm->size();
rank = comm->rank();
tree->tree_root = MPI_UNDEFINED;
tree->tree_nextsize = MPI_UNDEFINED;
- /*
+ /*
* Initialize tree
*/
tree->tree_fanout = 2;
tree->tree_next[0] = -1;
tree->tree_next[1] = -1;
XBT_DEBUG(
- "coll:tuned:topo_build_in_order_tree Building fo %d rt %d",
+ "coll:tuned:topo_build_in_order_tree Building fo %d rt %d",
tree->tree_fanout, tree->tree_root);
- /*
+ /*
* Build the tree
*/
myrank = rank;
rchild = -1;
if (size - 1 > 0) {
lchild = parent - 1;
- if (lchild > 0) {
+ if (lchild > 0) {
rchild = rightsize - 1;
}
}
-
- /* The following cases are possible: myrank can be
+
+ /* The following cases are possible: myrank can be
- a parent,
- belong to the left subtree, or
- belong to the right subtee
Each of the cases need to be handled differently.
*/
-
+
if (myrank == parent) {
/* I am the parent:
- compute real ranks of my children, and exit the loop. */
if (myrank > rchild) {
/* I belong to the left subtree:
- If I am the left child, compute real rank of my parent
- - Iterate down through tree:
+ - Iterate down through tree:
compute new size, shift ranks down, and update delta.
*/
if (myrank == lchild) {
} else {
/* I belong to the right subtree:
- If I am the right child, compute real rank of my parent
- - Iterate down through tree:
- compute new size and parent,
+ - Iterate down through tree:
+ compute new size and parent,
but the delta and rank do not need to change.
*/
if (myrank == rchild) {
parent = rchild;
}
}
-
+
if (tree->tree_next[0] >= 0) { tree->tree_nextsize = 1; }
if (tree->tree_next[1] >= 0) { tree->tree_nextsize += 1; }
}
/*
- *
+ *
* Here are some of the examples of this tree:
* size == 2 size = 4 size = 8
* 0 0 0
XBT_DEBUG("coll:tuned:topo:build_bmtree rt %d", root);
- /*
- * Get size and rank of the process in this communicator
+ /*
+ * Get size and rank of the process in this communicator
*/
size = comm->size();
rank = comm->rank();
/*
* Constructs in-order binomial tree which can be used for gather/scatter
* operations.
- *
+ *
* Here are some of the examples of this tree:
* size == 2 size = 4 size = 8
* 0 0 0
XBT_DEBUG("coll:tuned:topo:build_in_order_bmtree rt %d", root);
- /*
- * Get size and rank of the process in this communicator
+ /*
+ * Get size and rank of the process in this communicator
*/
size = comm->size();
rank = comm->rank();
XBT_DEBUG("coll:tuned:topo:build_chain fo %d rt %d", fanout, root);
- /*
- * Get size and rank of the process in this communicator
+ /*
+ * Get size and rank of the process in this communicator
*/
size = comm->size();
rank = comm->rank();
}
/*
- * Allocate space for topology arrays if needed
+ * Allocate space for topology arrays if needed
*/
chain = (ompi_coll_tree_t*)malloc( sizeof(ompi_coll_tree_t) );
if (not chain) {
chain->tree_nextsize = -1;
for(i=0;i<fanout;i++) chain->tree_next[i] = -1;
- /*
+ /*
* Set root & numchain
*/
chain->tree_root = root;
- if( (size - 1) < fanout ) {
+ if( (size - 1) < fanout ) {
chain->tree_nextsize = size-1;
fanout = size-1;
} else {
chain->tree_nextsize = fanout;
}
-
+
/*
* Shift ranks
*/
chain->tree_nextsize = 1;
} else {
chain->tree_next[0] = -1;
- chain->tree_nextsize = 0;
+ chain->tree_nextsize = 0;
}
}
}
-
+
/*
- * Unshift values
+ * Unshift values
*/
if( rank == root ) {
chain->tree_prev = -1;
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
- *
+ *
* Additional copyrights may follow
*/
int recvcnt,
MPI_Datatype recvtype,
int root, MPI_Comm comm);
-
+
extern MV2_Gather_function_ptr MV2_Gather_inter_leader_function;
extern MV2_Gather_function_ptr MV2_Gather_intra_node_function;
* (shmem_comm or intra_sock_comm or
* inter-sock_leader_comm)
* intra_node_fn_ptr - (in) Function ptr to choose the
- * intra node gather function
+ * intra node gather function
* errflag - (out) to record errors
*/
static int MPIR_pt_pt_intra_gather( void *sendbuf, int sendcnt, MPI_Datatype sendtype,
void *recvbuf, int recvcnt, MPI_Datatype recvtype,
- int root, int rank,
+ int root, int rank,
void *tmp_buf, int nbytes,
int is_data_avail,
- MPI_Comm comm,
+ MPI_Comm comm,
MV2_Gather_function_ptr intra_node_fn_ptr)
{
int mpi_errno = MPI_SUCCESS;
recvtype->extent(&true_lb,
&recvtype_true_extent);
}
-
+
/* Special case, when tmp_buf itself has data */
if (rank == root && sendbuf == MPI_IN_PLACE && is_data_avail) {
-
+
mpi_errno = intra_node_fn_ptr(MPI_IN_PLACE,
sendcnt, sendtype, tmp_buf, nbytes,
MPI_BYTE, 0, comm);
MPI_Aint true_lb = 0, sendtype_true_extent = 0, recvtype_true_extent = 0;
MPI_Comm shmem_comm, leader_comm;
void* tmp_buf = NULL;
-
+
//if not set (use of the algo directly, without mvapich2 selector)
if(MV2_Gather_intra_node_function==NULL)
MV2_Gather_intra_node_function= Coll_gather_mpich::gather;
-
+
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
}
shmem_comm = comm->get_intra_comm();
local_rank = shmem_comm->rank();
local_size = shmem_comm->size();
-
+
if (local_rank == 0) {
/* Node leader. Extract the rank, size information for the leader
* communicator */
}
#if defined(_SMP_LIMIC_)
- if((g_use_limic2_coll) && (shmem_commptr->ch.use_intra_sock_comm == 1)
+ if((g_use_limic2_coll) && (shmem_commptr->ch.use_intra_sock_comm == 1)
&& (use_limic_gather)
- &&((num_scheme == USE_GATHER_PT_PT_BINOMIAL)
+ &&((num_scheme == USE_GATHER_PT_PT_BINOMIAL)
|| (num_scheme == USE_GATHER_PT_PT_DIRECT)
- ||(num_scheme == USE_GATHER_PT_LINEAR_BINOMIAL)
+ ||(num_scheme == USE_GATHER_PT_LINEAR_BINOMIAL)
|| (num_scheme == USE_GATHER_PT_LINEAR_DIRECT)
|| (num_scheme == USE_GATHER_LINEAR_PT_BINOMIAL)
|| (num_scheme == USE_GATHER_LINEAR_PT_DIRECT)
|| (num_scheme == USE_GATHER_LINEAR_LINEAR)
|| (num_scheme == USE_GATHER_SINGLE_LEADER))) {
-
+
mpi_errno = MV2_Gather_intra_node_function(sendbuf, sendcnt, sendtype,
- recvbuf, recvcnt,recvtype,
+ recvbuf, recvcnt,recvtype,
root, comm);
} else
-#endif/*#if defined(_SMP_LIMIC_)*/
+#endif/*#if defined(_SMP_LIMIC_)*/
{
if (local_rank == 0) {
/* Node leader, allocate tmp_buffer */
}
/*while testing mpich2 gather test, we see that
* which basically splits the comm, and we come to
- * a point, where use_intra_sock_comm == 0, but if the
+ * a point, where use_intra_sock_comm == 0, but if the
* intra node function is MPIR_Intra_node_LIMIC_Gather_MV2,
- * it would use the intra sock comm. In such cases, we
+ * it would use the intra sock comm. In such cases, we
* fallback to binomial as a default case.*/
-#if defined(_SMP_LIMIC_)
+#if defined(_SMP_LIMIC_)
if(*MV2_Gather_intra_node_function == MPIR_Intra_node_LIMIC_Gather_MV2) {
mpi_errno = MPIR_pt_pt_intra_gather(sendbuf,sendcnt, sendtype,
recvbuf, recvcnt, recvtype,
- root, rank,
- tmp_buf, nbytes,
+ root, rank,
+ tmp_buf, nbytes,
TEMP_BUF_HAS_NO_DATA,
shmem_commptr,
MPIR_Gather_intra);
* local data, we pass is_data_avail = TEMP_BUF_HAS_NO_DATA*/
mpi_errno = MPIR_pt_pt_intra_gather(sendbuf,sendcnt, sendtype,
recvbuf, recvcnt, recvtype,
- root, rank,
- tmp_buf, nbytes,
+ root, rank,
+ tmp_buf, nbytes,
TEMP_BUF_HAS_NO_DATA,
shmem_comm,
MV2_Gather_intra_node_function
int* leaders_map = comm->get_leaders_map();
leader_of_root = comm->group()->rank(leaders_map[root]);
leader_root = leader_comm->group()->rank(leaders_map[root]);
- /* leader_root is the rank of the leader of the root in leader_comm.
- * leader_root is to be used as the root of the inter-leader gather ops
+ /* leader_root is the rank of the leader of the root in leader_comm.
+ * leader_root is to be used as the root of the inter-leader gather ops
*/
if (not comm->is_uniform()) {
if (local_rank == 0) {
}
}
} else {
- /* All nodes have the same number of processes.
- * Just do one Gather to get all
+ /* All nodes have the same number of processes.
+ * Just do one Gather to get all
* the data at the leader of the root process */
if (local_rank == 0) {
if (leader_comm_rank == leader_root && root != leader_of_root) {
recvcnt * local_size,
recvtype, leader_root,
leader_comm);
-
+
} else {
mpi_errno = MPIR_Gather_MV2_Direct(tmp_buf, nbytes * local_size,
MPI_BYTE, leader_gather_buf,
int err;
ompi_coll_tree_t* bmtree;
MPI_Status status;
- MPI_Aint sextent, slb, strue_lb, strue_extent;
+ MPI_Aint sextent, slb, strue_lb, strue_extent;
MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent;
MPI_Datatype sdtype,
void *rbuf, int rcount,
MPI_Datatype rdtype,
- int root,
+ int root,
MPI_Comm comm)
{
int i;
int first_segment_size=0;
size = comm->size();
rank = comm->rank();
-
+
size_t dsize, block_size;
if (rank == root) {
dsize= rdtype->size();
dsize=sdtype->size();
block_size = dsize * scount;
}
-
+
if (block_size > 92160){
first_segment_size = 32768;
}else{
ret = -1;
line = __LINE__;
goto error_hndl; }
-
+
typelng=rdtype->size();
rdtype->extent(&lb, &extent);
first_segment_count = rcount;
- COLL_TUNED_COMPUTED_SEGCOUNT( (size_t)first_segment_size, typelng,
+ COLL_TUNED_COMPUTED_SEGCOUNT( (size_t)first_segment_size, typelng,
first_segment_count );
for (i = 0; i < size; ++i) {
- if (i == rank) {
+ if (i == rank) {
/* skip myself */
- reqs[i] = MPI_REQUEST_NULL;
- continue;
- }
+ reqs[i] = MPI_REQUEST_NULL;
+ continue;
+ }
/* irecv for the first segment from i */
ptmp = (char*)rbuf + i * rcount * extent;
first_segment_req = Request::irecv(ptmp, first_segment_count, rdtype, i,
COLL_TAG_GATHER, comm
);
-
+
/* send sync message */
Request::send(rbuf, 0, MPI_BYTE, i,
COLL_TAG_GATHER,
/* irecv for the second segment */
ptmp = (char*)rbuf + (i * rcount + first_segment_count) * extent;
- reqs[i]=Request::irecv(ptmp, (rcount - first_segment_count),
+ reqs[i]=Request::irecv(ptmp, (rcount - first_segment_count),
rdtype, i, COLL_TAG_GATHER, comm
);
/* copy local data if necessary */
if (MPI_IN_PLACE != sbuf) {
ret = Datatype::copy(sbuf, scount, sdtype,
- (char*)rbuf + rank * rcount * extent,
+ (char*)rbuf + rank * rcount * extent,
rcount, rdtype);
if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
}
-
+
/* wait all second segments to complete */
ret = Request::waitall(size, reqs, MPI_STATUSES_IGNORE);
if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
return MPI_SUCCESS;
error_hndl:
- XBT_DEBUG(
- "ERROR_HNDL: node %d file %s line %d error %d\n",
+ XBT_DEBUG(
+ "ERROR_HNDL: node %d file %s line %d error %d\n",
rank, __FILE__, line, ret );
return ret;
}
/*
* Linear functions are copied from the BASIC coll module
* they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they
- * are just as fast as tuned/tree based segmenting operations
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as tuned/tree based segmenting operations
* and as such may be selected by the decision functions
* These are copied into this module due to the way we select modules
* in V1. i.e. in V2 we will handle this differently and so will not
int reduce_NTSL_segment_size_in_byte = 8192;
-/* Non-topology-specific pipelined linear-bcast function
+/* Non-topology-specific pipelined linear-bcast function
0->1, 1->2 ,2->3, ....., ->last node : in a pipeline fashion
*/
namespace simgrid{
/* use for buffer offset for sending and receiving data = segment size in byte */
int increment = segment * extent;
- /* if the input size is not divisible by segment size =>
+ /* if the input size is not divisible by segment size =>
the small remainder will be done with native implementation */
int remainder = count % segment;
Request::send(buf,count,datatype,0,tag,comm);
}
else if (rank == 0) {
- Request::recv(buf,count,datatype,root,tag,comm,&status);
+ Request::recv(buf,count,datatype,root,tag,comm,&status);
}
}
*/
/* use for buffer offset for sending and receiving data = segment size in byte */
int increment = segment * extent;
- /* if the input size is not divisible by segment size =>
+ /* if the input size is not divisible by segment size =>
the small remainder will be done with native implementation */
int remainder = count % segment;
tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent);
int is_commutative = (op==MPI_OP_NULL || op->is_commutative());
mask = 1;
-
+
int lroot;
- if (is_commutative)
+ if (is_commutative)
lroot = root;
else
lroot = 0;
/* adjust for potential negative lower bound in datatype */
tmp_buf = (void *)((char*)tmp_buf - true_lb);
-
+
/* If I'm not the root, then my recvbuf may not be valid, therefore
I have to allocate a temporary one */
if (rank != root) {
if (source < comm_size) {
source = (source + lroot) % comm_size;
Request::recv(tmp_buf, count, datatype, source, tag, comm, &status);
-
+
if (is_commutative) {
if(op!=MPI_OP_NULL) op->apply( tmp_buf, recvbuf, &count, datatype);
} else {
* copyright file COPYRIGHT in the top level MVAPICH2 directory.
*
*/
-
+
#include "../colls_private.h"
extern int mv2_reduce_intra_knomial_factor;
extern int mv2_reduce_inter_knomial_factor;
#define SMPI_DEFAULT_KNOMIAL_FACTOR 4
// int mv2_reduce_knomial_factor = 2;
-
-
-
-static int MPIR_Reduce_knomial_trace(int root, int reduce_knomial_factor,
+
+
+
+static int MPIR_Reduce_knomial_trace(int root, int reduce_knomial_factor,
MPI_Comm comm, int *dst, int *expected_send_count,
int *expected_recv_count, int **src_array)
{
int mask=0x1, k, comm_size, src, rank, relative_rank, lroot=0;
- int orig_mask=0x1;
+ int orig_mask=0x1;
int recv_iter=0, send_iter=0;
int *knomial_reduce_src_array=NULL;
comm_size = comm->size();
lroot = root;
relative_rank = (rank - lroot + comm_size) % comm_size;
- /* First compute to whom we need to send data */
+ /* First compute to whom we need to send data */
while (mask < comm_size) {
if (relative_rank % (reduce_knomial_factor*mask)) {
*dst = relative_rank/(reduce_knomial_factor*mask)*
}
mask /= reduce_knomial_factor;
- /* Now compute how many children we have in the knomial-tree */
- orig_mask = mask;
+ /* Now compute how many children we have in the knomial-tree */
+ orig_mask = mask;
while (mask > 0) {
for(k=1;k<reduce_knomial_factor;k++) {
if (relative_rank + mask*k < comm_size) {
mask /= reduce_knomial_factor;
}
- /* Finally, fill up the src array */
- if(recv_iter > 0) {
+ /* Finally, fill up the src array */
+ if(recv_iter > 0) {
knomial_reduce_src_array = static_cast<int*>(smpi_get_tmp_sendbuffer(sizeof(int)*recv_iter));
- }
+ }
- mask = orig_mask;
- recv_iter=0;
+ mask = orig_mask;
+ recv_iter=0;
while (mask > 0) {
for(k=1;k<reduce_knomial_factor;k++) {
if (relative_rank + mask*k < comm_size) {
*expected_recv_count = recv_iter;
*expected_send_count = send_iter;
- *src_array = knomial_reduce_src_array;
- return 0;
+ *src_array = knomial_reduce_src_array;
+ return 0;
}
-
+
namespace simgrid{
namespace smpi{
int Coll_reduce_mvapich2_knomial::reduce (
MPI_Request send_request;
int index=0;
MPI_Aint true_lb, true_extent, extent;
- MPI_Status status;
+ MPI_Status status;
int recv_iter=0, dst=-1, expected_send_count, expected_recv_count;
int *src_array=NULL;
void **tmp_buf=NULL;
}
- MPIR_Reduce_knomial_trace(root, mv2_reduce_intra_knomial_factor, comm,
+ MPIR_Reduce_knomial_trace(root, mv2_reduce_intra_knomial_factor, comm,
&dst, &expected_send_count, &expected_recv_count, &src_array);
if(expected_recv_count > 0 ) {
xbt_free(requests);
}
- if(src_array != NULL) {
+ if(src_array != NULL) {
xbt_free(src_array);
- }
+ }
if(rank != root) {
send_request=Request::isend(recvbuf,count, datatype, dst,
void *in_buf = NULL, *out_buf = NULL, *tmp_buf = NULL;
MPI_Aint true_lb, true_extent, extent;
int is_commutative = 0, stride = 0;
- int intra_node_root=0;
-
+ int intra_node_root=0;
+
//if not set (use of the algo directly, without mvapich2 selector)
if(MV2_Reduce_function==NULL)
MV2_Reduce_function=Coll_reduce_mpich::reduce;
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
}
-
+
my_rank = comm->rank();
total_size = comm->size();
shmem_comm = comm->get_intra_comm();
local_rank = shmem_comm->rank();
local_size = shmem_comm->size();
-
+
leader_comm = comm->get_leaders_comm();
int* leaders_map = comm->get_leaders_map();
leader_of_root = comm->group()->rank(leaders_map[root]);
in_buf = recvbuf;
}
- if (local_rank == 0) {
+ if (local_rank == 0) {
if( my_rank != root) {
out_buf = tmp_buf;
- } else {
- out_buf = recvbuf;
- if(in_buf == out_buf) {
- in_buf = MPI_IN_PLACE;
- out_buf = recvbuf;
- }
- }
+ } else {
+ out_buf = recvbuf;
+ if(in_buf == out_buf) {
+ in_buf = MPI_IN_PLACE;
+ out_buf = recvbuf;
+ }
+ }
} else {
- in_buf = (void *)sendbuf;
+ in_buf = (void *)sendbuf;
out_buf = NULL;
}
MPI_STATUS_IGNORE);
}
} else {
- if(mv2_use_knomial_reduce == 1) {
- reduce_fn = &MPIR_Reduce_intra_knomial_wrapper_MV2;
- } else {
- reduce_fn = &MPIR_Reduce_binomial_MV2;
- }
+ if(mv2_use_knomial_reduce == 1) {
+ reduce_fn = &MPIR_Reduce_intra_knomial_wrapper_MV2;
+ } else {
+ reduce_fn = &MPIR_Reduce_binomial_MV2;
+ }
mpi_errno = reduce_fn(sendbuf, recvbuf, count,
datatype, op,
root, comm);
}
/* We are done */
- if(tmp_buf!=NULL)
+ if(tmp_buf!=NULL)
smpi_free_tmp_buffer((void *) ((char *) tmp_buf + true_lb));
goto fn_exit;
}
-
+
if (local_rank == 0) {
leader_comm = comm->get_leaders_comm();
}
- if(local_size > 1) {
+ if(local_size > 1) {
/* Lets do the intra-node reduce operations, if we have more than one
* process in the node */
/*Fix the input and outbuf buffers for the intra-node reduce.
- *Node leaders will have the reduced data in tmp_buf after
+ *Node leaders will have the reduced data in tmp_buf after
*this step*/
if (MV2_Reduce_intra_function == & MPIR_Reduce_shmem_MV2)
{
datatype, op,
intra_node_root, shmem_comm);
}
- } else {
+ } else {
smpi_free_tmp_buffer((void *) ((char *) tmp_buf + true_lb));
- tmp_buf = in_buf;
- }
+ tmp_buf = in_buf;
+ }
/* Now work on the inter-leader phase. Data is in tmp_buf */
if (local_rank == 0 && leader_comm_size > 1) {
- /*The leader of root will have the global reduced data in tmp_buf
+ /*The leader of root will have the global reduced data in tmp_buf
or recv_buf
at the end of the reduce */
if (leader_comm_rank == leader_root) {
if (my_rank == root) {
- /* I am the root of the leader-comm, and the
- * root of the reduce op. So, I will write the
+ /* I am the root of the leader-comm, and the
+ * root of the reduce op. So, I will write the
* final result directly into my recvbuf */
- if(tmp_buf != recvbuf) {
+ if(tmp_buf != recvbuf) {
in_buf = tmp_buf;
out_buf = recvbuf;
- } else {
+ } else {
in_buf = (char *)smpi_get_tmp_sendbuffer(count*
datatype->get_extent());
Datatype::copy(tmp_buf, count, datatype,
in_buf, count, datatype);
- //in_buf = MPI_IN_PLACE;
- out_buf = recvbuf;
- }
+ //in_buf = MPI_IN_PLACE;
+ out_buf = recvbuf;
+ }
} else {
in_buf = (char *)smpi_get_tmp_sendbuffer(count*
datatype->get_extent());
smpi_free_tmp_buffer((void *) ((char *) tmp_buf + true_lb));
if (leader_comm_rank == leader_root) {
- if (my_rank != root || (my_rank == root && tmp_buf == recvbuf)) {
+ if (my_rank != root || (my_rank == root && tmp_buf == recvbuf)) {
smpi_free_tmp_buffer(in_buf);
}
}
* the number of datatype to the original count (original_count)
*
* Note that for non-commutative operations we cannot save memory copy
- * for the first block: thus we must copy sendbuf to accumbuf on intermediate
+ * for the first block: thus we must copy sendbuf to accumbuf on intermediate
* to keep the optimized loop happy.
*/
int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int original_count,
num_segments = (original_count + count_by_segment - 1) / count_by_segment;
segment_increment = count_by_segment * extent;
- sendtmpbuf = (char*) sendbuf;
- if( sendbuf == MPI_IN_PLACE ) {
- sendtmpbuf = (char *)recvbuf;
+ sendtmpbuf = (char*) sendbuf;
+ if( sendbuf == MPI_IN_PLACE ) {
+ sendtmpbuf = (char *)recvbuf;
}
XBT_DEBUG( "coll:tuned:reduce_generic count %d, msg size %ld, segsize %ld, max_requests %d", original_count, (unsigned long)(num_segments * segment_increment), (unsigned long)segment_increment, max_outstanding_reqs);
rank = comm->rank();
- /* non-leaf nodes - wait for children to send me data & forward up
+ /* non-leaf nodes - wait for children to send me data & forward up
(if needed) */
if( tree->tree_nextsize > 0 ) {
ptrdiff_t true_extent, real_segment_size;
true_extent=datatype->get_extent();
- /* handle non existant recv buffer (i.e. its NULL) and
+ /* handle non existant recv buffer (i.e. its NULL) and
protect the recv buffer on non-root nodes */
accumbuf = (char*)recvbuf;
if( (NULL == accumbuf) || (root != rank) ) {
/* Allocate temporary accumulator buffer. */
accumbuf_free = (char*)smpi_get_tmp_sendbuffer(true_extent +
(original_count - 1) * extent);
- if (accumbuf_free == NULL) {
- line = __LINE__; ret = -1; goto error_hndl;
+ if (accumbuf_free == NULL) {
+ line = __LINE__; ret = -1; goto error_hndl;
}
accumbuf = accumbuf_free - lower_bound;
- }
+ }
/* If this is a non-commutative operation we must copy
sendbuf to the accumbuf, in order to simplfy the loops */
/* Allocate two buffers for incoming segments */
real_segment_size = true_extent + (count_by_segment - 1) * extent;
inbuf_free[0] = (char*) smpi_get_tmp_recvbuffer(real_segment_size);
- if( inbuf_free[0] == NULL ) {
- line = __LINE__; ret = -1; goto error_hndl;
+ if( inbuf_free[0] == NULL ) {
+ line = __LINE__; ret = -1; goto error_hndl;
}
inbuf[0] = inbuf_free[0] - lower_bound;
/* if there is chance to overlap communication -
allocate second buffer */
if( (num_segments > 1) || (tree->tree_nextsize > 1) ) {
inbuf_free[1] = (char*) smpi_get_tmp_recvbuffer(real_segment_size);
- if( inbuf_free[1] == NULL ) {
+ if( inbuf_free[1] == NULL ) {
line = __LINE__; ret = -1; goto error_hndl;
}
inbuf[1] = inbuf_free[1] - lower_bound;
- }
+ }
/* reset input buffer index and receive count */
inbi = 0;
if( segindex < num_segments ) {
void* local_recvbuf = inbuf[inbi];
if( 0 == i ) {
- /* for the first step (1st child per segment) and
- * commutative operations we might be able to irecv
- * directly into the accumulate buffer so that we can
- * reduce(op) this with our sendbuf in one step as
- * ompi_op_reduce only has two buffer pointers,
+ /* for the first step (1st child per segment) and
+ * commutative operations we might be able to irecv
+ * directly into the accumulate buffer so that we can
+ * reduce(op) this with our sendbuf in one step as
+ * ompi_op_reduce only has two buffer pointers,
* this avoids an extra memory copy.
*
- * BUT if the operation is non-commutative or
+ * BUT if the operation is non-commutative or
* we are root and are USING MPI_IN_PLACE this is wrong!
*/
if( (op==MPI_OP_NULL || op->is_commutative()) &&
}
reqs[inbi]=Request::irecv(local_recvbuf, recvcount, datatype,
- tree->tree_next[i],
+ tree->tree_next[i],
COLL_TAG_REDUCE, comm
);
}
/* wait for previous req to complete, if any.
- if there are no requests reqs[inbi ^1] will be
+ if there are no requests reqs[inbi ^1] will be
MPI_REQUEST_NULL. */
/* wait on data from last child for previous segment */
- Request::waitall( 1, &reqs[inbi ^ 1],
+ Request::waitall( 1, &reqs[inbi ^ 1],
MPI_STATUSES_IGNORE );
local_op_buffer = inbuf[inbi ^ 1];
if( i > 0 ) {
- /* our first operation is to combine our own [sendbuf] data
- * with the data we recvd from down stream (but only
- * the operation is commutative and if we are not root and
+ /* our first operation is to combine our own [sendbuf] data
+ * with the data we recvd from down stream (but only
+ * the operation is commutative and if we are not root and
* not using MPI_IN_PLACE)
*/
if( 1 == i ) {
- if( (op==MPI_OP_NULL || op->is_commutative())&&
+ if( (op==MPI_OP_NULL || op->is_commutative())&&
!((MPI_IN_PLACE == sendbuf) && (rank == tree->tree_root)) ) {
local_op_buffer = sendtmpbuf + segindex * segment_increment;
}
}
/* apply operation */
- if(op!=MPI_OP_NULL) op->apply( local_op_buffer,
- accumbuf + segindex * segment_increment,
+ if(op!=MPI_OP_NULL) op->apply( local_op_buffer,
+ accumbuf + segindex * segment_increment,
&recvcount, datatype );
} else if ( segindex > 0 ) {
void* accumulator = accumbuf + (segindex-1) * segment_increment;
local_op_buffer = sendtmpbuf + (segindex-1) * segment_increment;
}
}
- if(op!=MPI_OP_NULL) op->apply( local_op_buffer, accumulator, &prevcount,
+ if(op!=MPI_OP_NULL) op->apply( local_op_buffer, accumulator, &prevcount,
datatype );
- /* all reduced on available data this step (i) complete,
+ /* all reduced on available data this step (i) complete,
* pass to the next process unless you are the root.
*/
if (rank != tree->tree_root) {
/* send combined/accumulated data to parent */
- Request::send( accumulator, prevcount,
- datatype, tree->tree_prev,
+ Request::send( accumulator, prevcount,
+ datatype, tree->tree_prev,
COLL_TAG_REDUCE,
comm);
}
- /* we stop when segindex = number of segments
+ /* we stop when segindex = number of segments
(i.e. we do num_segment+1 steps for pipelining */
if (segindex == num_segments) break;
}
smpi_free_tmp_buffer(accumbuf_free);
}
- /* leaf nodes
- Depending on the value of max_outstanding_reqs and
+ /* leaf nodes
+ Depending on the value of max_outstanding_reqs and
the number of segments we have two options:
- send all segments using blocking send to the parent, or
- - avoid overflooding the parent nodes by limiting the number of
+ - avoid overflooding the parent nodes by limiting the number of
outstanding requests to max_oustanding_reqs.
- TODO/POSSIBLE IMPROVEMENT: If there is a way to determine the eager size
- for the current communication, synchronization should be used only
+ TODO/POSSIBLE IMPROVEMENT: If there is a way to determine the eager size
+ for the current communication, synchronization should be used only
when the message/segment size is smaller than the eager size.
*/
else {
/* If the number of segments is less than a maximum number of oustanding
- requests or there is no limit on the maximum number of outstanding
+ requests or there is no limit on the maximum number of outstanding
requests, we send data to the parent using blocking send */
- if ((0 == max_outstanding_reqs) ||
+ if ((0 == max_outstanding_reqs) ||
(num_segments <= max_outstanding_reqs)) {
-
+
segindex = 0;
while ( original_count > 0) {
if (original_count < count_by_segment) {
count_by_segment = original_count;
}
- Request::send((char*)sendbuf +
+ Request::send((char*)sendbuf +
segindex * segment_increment,
count_by_segment, datatype,
- tree->tree_prev,
+ tree->tree_prev,
COLL_TAG_REDUCE,
comm) ;
segindex++;
sreq[segindex]=Request::isend((char*)sendbuf +
segindex * segment_increment,
count_by_segment, datatype,
- tree->tree_prev,
+ tree->tree_prev,
COLL_TAG_REDUCE,
comm);
original_count -= count_by_segment;
if( original_count < count_by_segment ) {
count_by_segment = original_count;
}
- sreq[creq]=Request::isend((char*)sendbuf +
- segindex * segment_increment,
- count_by_segment, datatype,
- tree->tree_prev,
+ sreq[creq]=Request::isend((char*)sendbuf +
+ segindex * segment_increment,
+ count_by_segment, datatype,
+ tree->tree_prev,
COLL_TAG_REDUCE,
comm );
creq = (creq + 1) % max_outstanding_reqs;
}
/* Wait on the remaining request to complete */
- Request::waitall( max_outstanding_reqs, sreq,
+ Request::waitall( max_outstanding_reqs, sreq,
MPI_STATUSES_IGNORE );
/* free requests */
return MPI_SUCCESS;
error_hndl: /* error handler */
- XBT_DEBUG("ERROR_HNDL: node %d file %s line %d error %d\n",
+ XBT_DEBUG("ERROR_HNDL: node %d file %s line %d error %d\n",
rank, __FILE__, line, ret );
if( inbuf_free[0] != NULL ) free(inbuf_free[0]);
if( inbuf_free[1] != NULL ) free(inbuf_free[1]);
int Coll_reduce_ompi_chain::reduce( void *sendbuf, void *recvbuf, int count,
- MPI_Datatype datatype,
- MPI_Op op, int root,
+ MPI_Datatype datatype,
+ MPI_Op op, int root,
MPI_Comm comm
)
{
* sent per operation
*/
typelng = datatype->size();
-
+
COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
- return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype,
+ return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype,
op, root, comm,
- ompi_coll_tuned_topo_build_chain(fanout, comm, root),
+ ompi_coll_tuned_topo_build_chain(fanout, comm, root),
segcount, 0 );
}
const double b4 = 1.6761;
typelng= datatype->size();
int communicator_size = comm->size();
- size_t message_size = typelng * count;
+ size_t message_size = typelng * count;
if (communicator_size > (a2 * message_size + b2)) {
- // Pipeline_1K
+ // Pipeline_1K
segsize = 1024;
}else if (communicator_size > (a4 * message_size + b4)) {
- // Pipeline_32K
+ // Pipeline_32K
segsize = 32*1024;
} else {
- // Pipeline_64K
+ // Pipeline_64K
segsize = 64*1024;
}
COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
- return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype,
+ return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype,
op, root, comm,
- ompi_coll_tuned_topo_build_chain( 1, comm, root),
+ ompi_coll_tuned_topo_build_chain( 1, comm, root),
segcount, 0);
}
*/
typelng=datatype->size();
- // Binary_32K
+ // Binary_32K
segsize = 32*1024;
XBT_DEBUG("coll:tuned:reduce_intra_binary rank %d ss %5d",
COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
- return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype,
- op, root, comm,
- ompi_coll_tuned_topo_build_tree(2, comm, root),
+ return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype,
+ op, root, comm,
+ ompi_coll_tuned_topo_build_tree(2, comm, root),
segcount, 0);
}
*/
typelng= datatype->size();
int communicator_size = comm->size();
- size_t message_size = typelng * count;
+ size_t message_size = typelng * count;
if (((communicator_size < 8) && (message_size < 20480)) ||
(message_size < 2048) || (count <= 1)) {
/* Binomial_0K */
segsize = 0;
} else if (communicator_size > (a1 * message_size + b1)) {
- // Binomial_1K
+ // Binomial_1K
segsize = 1024;
}
comm->rank(), segsize);
COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
- return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype,
- op, root, comm,
- ompi_coll_tuned_topo_build_in_order_bmtree(comm, root),
+ return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype,
+ op, root, comm,
+ ompi_coll_tuned_topo_build_in_order_bmtree(comm, root),
segcount, 0);
}
/*
- * reduce_intra_in_order_binary
- *
+ * reduce_intra_in_order_binary
+ *
* Function: Logarithmic reduce operation for non-commutative operations.
* Acecpts: same as MPI_Reduce()
* Returns: MPI_SUCCESS or error code
*/
int Coll_reduce_ompi_in_order_binary::reduce( void *sendbuf, void *recvbuf,
- int count,
+ int count,
MPI_Datatype datatype,
MPI_Op op, int root,
MPI_Comm comm)
/* An in-order binary tree must use root (size-1) to preserve the order of
operations. Thus, if root is not rank (size - 1), then we must handle
- 1. MPI_IN_PLACE option on real root, and
+ 1. MPI_IN_PLACE option on real root, and
2. we must allocate temporary recvbuf on rank (size - 1).
- Note that generic function must be careful not to switch order of
+ Note that generic function must be careful not to switch order of
operations for non-commutative ops.
*/
io_root = size - 1;
if (io_root != root) {
ptrdiff_t text, ext;
char *tmpbuf = NULL;
-
+
ext=datatype->get_extent();
text=datatype->get_extent();
/* Use generic reduce with in-order binary tree topology and io_root */
ret = smpi_coll_tuned_ompi_reduce_generic( use_this_sendbuf, use_this_recvbuf, count, datatype,
- op, io_root, comm,
- ompi_coll_tuned_topo_build_in_order_bintree(comm),
+ op, io_root, comm,
+ ompi_coll_tuned_topo_build_in_order_bintree(comm),
segcount, 0 );
if (MPI_SUCCESS != ret) { return ret; }
if (MPI_IN_PLACE == sendbuf) {
smpi_free_tmp_buffer(use_this_sendbuf);
}
-
+
} else if (io_root == rank) {
/* Send result from use_this_recvbuf to root */
Request::send(use_this_recvbuf, count, datatype, root,
/*
* Linear functions are copied from the BASIC coll module
* they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they
- * are just as fast as tuned/tree based segmenting operations
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as tuned/tree based segmenting operations
* and as such may be selected by the decision functions
* These are copied into this module due to the way we select modules
* in V1. i.e. in V2 we will handle this differently and so will not
return MPI_SUCCESS;
}
- /* see discussion in ompi_coll_basic_reduce_lin_intra about
+ /* see discussion in ompi_coll_basic_reduce_lin_intra about
extent and true extent */
/* for reducing buffer allocation lengths.... */
return 0;
rank = comm->rank();
comm_size = comm->size();
-
+
extent = datatype->get_extent();
MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
int rank, comm_size, i;
- MPI_Aint extent, true_extent, true_lb;
+ MPI_Aint extent, true_extent, true_lb;
int *disps;
void *tmp_recvbuf;
int mpi_errno = MPI_SUCCESS;
extent =datatype->get_extent();
datatype->extent(&true_lb, &true_extent);
-
+
if (op->is_commutative()) {
is_commutative = 1;
}
disps[i] = total_count;
total_count += recvcounts[i];
}
-
+
if (total_count == 0) {
xbt_free(disps);
return MPI_ERR_COUNT;
recvcounts[rank], datatype, recvbuf,
recvcounts[rank], datatype);
}
-
+
/* allocate temporary buffer to store incoming data */
tmp_recvbuf = (void*)smpi_get_tmp_recvbuffer(recvcounts[rank]*(MAX(true_extent,extent))+1);
/* adjust for potential negative lower bound in datatype */
tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb);
-
+
for (i=1; i<comm_size; i++) {
src = (rank - i + comm_size) % comm_size;
dst = (rank + i) % comm_size;
-
+
/* send the data that dst needs. recv data that this process
needs from src into tmp_recvbuf */
- if (sendbuf != MPI_IN_PLACE)
- Request::sendrecv(((char *)sendbuf+disps[dst]*extent),
+ if (sendbuf != MPI_IN_PLACE)
+ Request::sendrecv(((char *)sendbuf+disps[dst]*extent),
recvcounts[dst], datatype, dst,
COLL_TAG_SCATTER, tmp_recvbuf,
recvcounts[rank], datatype, src,
COLL_TAG_SCATTER, comm,
MPI_STATUS_IGNORE);
else
- Request::sendrecv(((char *)recvbuf+disps[dst]*extent),
+ Request::sendrecv(((char *)recvbuf+disps[dst]*extent),
recvcounts[dst], datatype, dst,
COLL_TAG_SCATTER, tmp_recvbuf,
recvcounts[rank], datatype, src,
COLL_TAG_SCATTER, comm,
MPI_STATUS_IGNORE);
-
+
if (is_commutative || (src < rank)) {
if (sendbuf != MPI_IN_PLACE) {
if (op != MPI_OP_NULL)
}
}
}
-
+
/* if MPI_IN_PLACE, move output data to the beginning of
recvbuf. already done for rank 0. */
if ((sendbuf == MPI_IN_PLACE) && (rank != 0)) {
mpi_errno = Datatype::copy(((char *)recvbuf +
- disps[rank]*extent),
+ disps[rank]*extent),
recvcounts[rank], datatype,
- recvbuf,
+ recvbuf,
recvcounts[rank], datatype );
if (mpi_errno) return(mpi_errno);
}
-
+
xbt_free(disps);
smpi_free_tmp_buffer(tmp_recvbuf);
return MPI_SUCCESS;
}
-
+
int Coll_reduce_scatter_mpich_noncomm::reduce_scatter(void *sendbuf, void *recvbuf, int recvcounts[],
MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
is now our peer's responsibility */
if (rank > peer) {
/* higher ranked value so need to call op(received_data, my_data) */
- if(op!=MPI_OP_NULL) op->apply(
+ if(op!=MPI_OP_NULL) op->apply(
incoming_data + recv_offset*true_extent,
outgoing_data + recv_offset*true_extent,
&size, datatype );
MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
int rank, comm_size, i;
- MPI_Aint extent, true_extent, true_lb;
+ MPI_Aint extent, true_extent, true_lb;
int *disps;
void *tmp_recvbuf, *tmp_results;
int mpi_errno = MPI_SUCCESS;
extent =datatype->get_extent();
datatype->extent(&true_lb, &true_extent);
-
+
if ((op==MPI_OP_NULL) || op->is_commutative()) {
is_commutative = 1;
}
disps[i] = total_count;
total_count += recvcounts[i];
}
-
+
/* noncommutative and (non-pof2 or block irregular), use recursive doubling. */
/* need to allocate temporary buffer to receive incoming data*/
mpi_errno = Datatype::create_indexed(2, blklens, dis, datatype, &sendtype);
if (mpi_errno) return(mpi_errno);
-
+
sendtype->commit();
/* calculate recvtype */
mpi_errno = Datatype::create_indexed(2, blklens, dis, datatype, &recvtype);
if (mpi_errno) return(mpi_errno);
-
+
recvtype->commit();
received = 0;
if (dst < comm_size) {
/* tmp_results contains data to be sent in each step. Data is
received in tmp_recvbuf and then accumulated into
- tmp_results. accumulation is done later below. */
+ tmp_results. accumulation is done later below. */
Request::sendrecv(tmp_results, 1, sendtype, dst,
COLL_TAG_SCATTER,
in a tree fashion. First find root of current tree
that is being divided into two. k is the number of
least-significant bits in this process's rank that
- must be zeroed out to find the rank of the root */
+ must be zeroed out to find the rank of the root */
j = mask;
k = 0;
while (j) {
/* send only if this proc has data and destination
doesn't have data. at any step, multiple processes
can send if they have the data */
- if ((dst > rank) &&
+ if ((dst > rank) &&
(rank < tree_root + nprocs_completed)
&& (dst >= tree_root + nprocs_completed)) {
/* send the current result */
}
/* recv only if this proc. doesn't have data and sender
has data */
- else if ((dst < rank) &&
+ else if ((dst < rank) &&
(dst < tree_root + nprocs_completed) &&
(rank >= tree_root + nprocs_completed)) {
Request::recv(tmp_recvbuf, 1, recvtype, dst,
COLL_TAG_SCATTER,
- comm, MPI_STATUS_IGNORE);
+ comm, MPI_STATUS_IGNORE);
received = 1;
}
tmp_mask >>= 1;
}
}
- /* The following reduction is done here instead of after
+ /* The following reduction is done here instead of after
the MPIC_Sendrecv_ft or MPIC_Recv_ft above. This is
- because to do it above, in the noncommutative
+ because to do it above, in the noncommutative
case, we would need an extra temp buffer so as not to
overwrite temp_recvbuf, because temp_recvbuf may have
to be communicated to other processes in the
&blklens[1], datatype);
}
/* copy result back into tmp_results */
- mpi_errno = Datatype::copy(tmp_recvbuf, 1, recvtype,
+ mpi_errno = Datatype::copy(tmp_recvbuf, 1, recvtype,
tmp_results, 1, recvtype);
if (mpi_errno) return(mpi_errno);
}
/*
* Recursive-halving function is (*mostly*) copied from the BASIC coll module.
- * I have removed the part which handles "large" message sizes
+ * I have removed the part which handles "large" message sizes
* (non-overlapping version of reduce_Scatter).
*/
/*
* reduce_scatter_ompi_basic_recursivehalving
*
- * Function: - reduce scatter implementation using recursive-halving
+ * Function: - reduce scatter implementation using recursive-halving
* algorithm
* Accepts: - same as MPI_Reduce_scatter()
* Returns: - MPI_SUCCESS or error code
namespace simgrid{
namespace smpi{
int
-Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(void *sbuf,
- void *rbuf,
+Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(void *sbuf,
+ void *rbuf,
int *rcounts,
MPI_Datatype dtype,
MPI_Op op,
ptrdiff_t true_lb, true_extent, lb, extent, buf_size;
char *recv_buf = NULL, *recv_buf_free = NULL;
char *result_buf = NULL, *result_buf_free = NULL;
-
+
/* Initialize */
rank = comm->rank();
size = comm->size();
-
+
XBT_DEBUG("coll:tuned:reduce_scatter_ompi_basic_recursivehalving, rank %d", rank);
if ((op != MPI_OP_NULL && not op->is_commutative()))
THROWF(arg_error,0, " reduce_scatter ompi_basic_recursivehalving can only be used for commutative operations! ");
err = MPI_ERR_OTHER;
goto cleanup;
}
-
+
/* allocate temporary buffer for results */
result_buf_free = (char*) smpi_get_tmp_sendbuffer(buf_size);
result_buf = result_buf_free - lb;
-
+
/* copy local buffer into the temporary results */
err =Datatype::copy(sbuf, count, dtype, result_buf, count, dtype);
if (MPI_SUCCESS != err) goto cleanup;
-
+
/* figure out power of two mapping: grow until larger than
comm size, then go back one, to get the largest power of
two less than comm size */
while (tmp_size <= size) tmp_size <<= 1;
tmp_size >>= 1;
remain = size - tmp_size;
-
+
/* If comm size is not a power of two, have the first "remain"
procs with an even rank send to rank + 1, leaving a power of
two procs to do the rest of the algorithm */
if (rank < 2 * remain) {
if ((rank & 1) == 0) {
- Request::send(result_buf, count, dtype, rank + 1,
+ Request::send(result_buf, count, dtype, rank + 1,
COLL_TAG_REDUCE_SCATTER,
comm);
/* we don't participate from here on out */
Request::recv(recv_buf, count, dtype, rank - 1,
COLL_TAG_REDUCE_SCATTER,
comm, MPI_STATUS_IGNORE);
-
+
/* integrate their results into our temp results */
if(op!=MPI_OP_NULL) op->apply( recv_buf, result_buf, &count, dtype);
-
+
/* adjust rank to be the bottom "remain" ranks */
tmp_rank = rank / 2;
}
remain" ranks dropped out */
tmp_rank = rank - remain;
}
-
+
/* For ranks not kicked out by the above code, perform the
recursive halving */
if (tmp_rank >= 0) {
int *tmp_disps = NULL, *tmp_rcounts = NULL;
int mask, send_index, recv_index, last_index;
-
+
/* recalculate disps and rcounts to account for the
special "remainder" processes that are no longer doing
anything */
xbt_free(tmp_rcounts);
xbt_free(tmp_disps);
goto cleanup;
- }
+ }
}
if (recv_count > 0 && send_count != 0) {
Request::send(result_buf + (ptrdiff_t)tmp_disps[send_index] * extent,
- send_count, dtype, peer,
+ send_count, dtype, peer,
COLL_TAG_REDUCE_SCATTER,
comm);
if (MPI_SUCCESS != err) {
xbt_free(tmp_rcounts);
xbt_free(tmp_disps);
goto cleanup;
- }
+ }
}
if (send_count > 0 && recv_count != 0) {
Request::wait(&request, MPI_STATUS_IGNORE);
/* if we received something on this step, push it into
the results buffer */
if (recv_count > 0) {
- if(op!=MPI_OP_NULL) op->apply(
- recv_buf + (ptrdiff_t)tmp_disps[recv_index] * extent,
+ if(op!=MPI_OP_NULL) op->apply(
+ recv_buf + (ptrdiff_t)tmp_disps[recv_index] * extent,
result_buf + (ptrdiff_t)tmp_disps[recv_index] * extent,
&recv_count, dtype);
}
/* copy local results from results buffer into real receive buffer */
if (0 != rcounts[rank]) {
err = Datatype::copy(result_buf + disps[rank] * extent,
- rcounts[rank], dtype,
+ rcounts[rank], dtype,
rbuf, rcounts[rank], dtype);
if (MPI_SUCCESS != err) {
xbt_free(tmp_rcounts);
xbt_free(tmp_disps);
goto cleanup;
- }
+ }
}
xbt_free(tmp_rcounts);
COLL_TAG_REDUCE_SCATTER,
comm);
}
- }
+ }
}
cleanup:
* Accepts: Same as MPI_Reduce_scatter()
* Returns: MPI_SUCCESS or error code
*
- * Description: Implements ring algorithm for reduce_scatter:
- * the block sizes defined in rcounts are exchanged and
+ * Description: Implements ring algorithm for reduce_scatter:
+ * the block sizes defined in rcounts are exchanged and
8 updated until they reach proper destination.
* Algorithm requires 2 * max(rcounts) extra buffering
*
- * Limitations: The algorithm DOES NOT preserve order of operations so it
+ * Limitations: The algorithm DOES NOT preserve order of operations so it
* can be used only for commutative operations.
* Example on 5 nodes:
* Initial state
* [04] -> [14] [24] [34] [44]
*
* COMPUTATION PHASE
- * Step 0: rank r sends block (r-1) to rank (r+1) and
+ * Step 0: rank r sends block (r-1) to rank (r+1) and
* receives block (r+1) from rank (r-1) [with wraparound].
* # 0 1 2 3 4
* [00] [10] [10+20] -> [30] [40]
* -> [02] [12] [22] [32] [32+42] -->..
* [43+03] -> [13] [23] [33] [43]
* [04] [04+14] -> [24] [34] [44]
- *
+ *
* Step 1:
* # 0 1 2 3 4
* [00] [10] [10+20] [10+20+30] -> [40]
* -> [01] [11] [21] [21+31] [21+31+41] ->
- * [32+42+02] -> [12] [22] [32] [32+42]
+ * [32+42+02] -> [12] [22] [32] [32+42]
* [03] [43+03+13] -> [23] [33] [43]
* [04] [04+14] [04+14+24] -> [34] [44]
*
* # 0 1 2 3 4
* -> [00] [10] [10+20] [10+20+30] [10+20+30+40] ->
* [21+31+41+01]-> [11] [21] [21+31] [21+31+41]
- * [32+42+02] [32+42+02+12]-> [22] [32] [32+42]
+ * [32+42+02] [32+42+02+12]-> [22] [32] [32+42]
* [03] [43+03+13] [43+03+13+23]-> [33] [43]
* [04] [04+14] [04+14+24] [04+14+24+34] -> [44]
*
* # 0 1 2 3 4
* [10+20+30+40+00] [10] [10+20] [10+20+30] [10+20+30+40]
* [21+31+41+01] [21+31+41+01+11] [21] [21+31] [21+31+41]
- * [32+42+02] [32+42+02+12] [32+42+02+12+22] [32] [32+42]
+ * [32+42+02] [32+42+02+12] [32+42+02+12+22] [32] [32+42]
* [03] [43+03+13] [43+03+13+23] [43+03+13+23+33] [43]
* [04] [04+14] [04+14+24] [04+14+24+34] [04+14+24+34+44]
* DONE :)
*
*/
-int
+int
Coll_reduce_scatter_ompi_ring::reduce_scatter(void *sbuf, void *rbuf, int *rcounts,
MPI_Datatype dtype,
MPI_Op op,
size = comm->size();
rank = comm->rank();
- XBT_DEBUG( "coll:tuned:reduce_scatter_ompi_ring rank %d, size %d",
+ XBT_DEBUG( "coll:tuned:reduce_scatter_ompi_ring rank %d, size %d",
rank, size);
- /* Determine the maximum number of elements per node,
+ /* Determine the maximum number of elements per node,
corresponding block size, and displacements array.
*/
displs = (int*) xbt_malloc(size * sizeof(int));
displs[0] = 0;
total_count = rcounts[0];
max_block_count = rcounts[0];
- for (i = 1; i < size; i++) {
+ for (i = 1; i < size; i++) {
displs[i] = total_count;
total_count += rcounts[i];
if (max_block_count < rcounts[i]) max_block_count = rcounts[i];
}
-
+
/* Special case for size == 1 */
if (1 == size) {
if (MPI_IN_PLACE != sbuf) {
/* Computation loop */
- /*
+ /*
For each of the remote nodes:
- post irecv for block (r-2) from (r-1) with wrap around
- send block (r-1) to (r+1)
for (k = 2; k < size; k++) {
const int prevblock = (rank + size - k) % size;
-
+
inbi = inbi ^ 0x1;
/* Post irecv for the current block */
reqs[inbi]=Request::irecv(inbuf[inbi], max_block_count, dtype, recv_from,
COLL_TAG_REDUCE_SCATTER, comm
);
-
+
/* Wait on previous block to arrive */
Request::wait(&reqs[inbi ^ 0x1], MPI_STATUS_IGNORE);
-
+
/* Apply operation on previous block: result goes to rbuf
rbuf[prevblock] = inbuf[inbi ^ 0x1] (op) rbuf[prevblock]
*/
tmprecv = accumbuf + (ptrdiff_t)displs[prevblock] * extent;
if(op!=MPI_OP_NULL) op->apply( inbuf[inbi ^ 0x1], tmprecv, &(rcounts[prevblock]), dtype);
-
+
/* send previous block to send_to */
Request::send(tmprecv, rcounts[prevblock], dtype, send_to,
COLL_TAG_REDUCE_SCATTER,
rbuf[rank] = inbuf[inbi] (op) rbuf[rank] */
tmprecv = accumbuf + (ptrdiff_t)displs[rank] * extent;
if(op!=MPI_OP_NULL) op->apply( inbuf[inbi], tmprecv, &(rcounts[rank]), dtype);
-
+
/* Copy result from tmprecv to rbuf */
ret = Datatype::copy(tmprecv, rcounts[rank], dtype, (char*)rbuf, rcounts[rank], dtype);
if (ret < 0) { line = __LINE__; goto error_hndl; }
//if not set (use of the algo directly, without mvapich2 selector)
if(MV2_Scatter_intra_function==NULL)
MV2_Scatter_intra_function=Coll_scatter_mpich::scatter;
-
+
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
}
//if not set (use of the algo directly, without mvapich2 selector)
if(MV2_Scatter_intra_function==NULL)
MV2_Scatter_intra_function=Coll_scatter_mpich::scatter;
-
+
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
}
int err;
ompi_coll_tree_t* bmtree;
MPI_Status status;
- MPI_Aint sextent, slb, strue_lb, strue_extent;
+ MPI_Aint sextent, slb, strue_lb, strue_extent;
MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent;
size = comm->size();
"Coll_scatter_ompi_binomial::scatter rank %d", rank);
/* create the binomial tree */
-
+
// COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root );
bmtree = ompi_coll_tuned_topo_build_in_order_bmtree( comm, root);//ompi_ data->cached_in_order_bmtree;
/*
* Linear functions are copied from the BASIC coll module
* they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they
- * are just as fast as tuned/tree based segmenting operations
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as tuned/tree based segmenting operations
* and as such may be selected by the decision functions
* These are copied into this module due to the way we select modules
* in V1. i.e. in V2 we will handle this differently and so will not
sendtmpbuf = static_cast<char *>(smpi_get_tmp_sendbuffer(count*datatype->get_extent()));
Datatype::copy(recvbuf, count, datatype,sendtmpbuf, count, datatype);
}
-
+
if(rank != root) {
// Send buffer to root
Request::send(sendtmpbuf, count, datatype, root, system_tag, comm);
XBT_DEBUG("<%d> wait for %d requests", rank, count);
Request::waitall(count, requests, MPI_STATUS_IGNORE);
for(i = 0; i < count; i++) {
- if(requests[i]!=MPI_REQUEST_NULL)
+ if(requests[i]!=MPI_REQUEST_NULL)
Request::unref(&requests[i]);
}
xbt_free(requests);
4 - Topology aware Reduce + Bcast algorithm
5 - Binomial gather + scatter algorithm
6 - Topology aware binominal gather + scatter algorithm
-7 - Shumilin's ring algorithm
+7 - Shumilin's ring algorithm
8 - Ring algorithm
as Shumilin's ring algorithm is unknown, default to ring'
intel_tuning_table_element intel_allreduce_table[] =
{
- {1,{
+ {1,{
{ 2,9,{
{6,7},
{85,1},
-/*I_MPI_ADJUST_ALLTOALL
+/*I_MPI_ADJUST_ALLTOALL
-MPI_Alltoall
+MPI_Alltoall
-1. Bruck's algorithm
-2. Isend/Irecv + waitall algorithm
-3. Pair wise exchange algorithm
+1. Bruck's algorithm
+2. Isend/Irecv + waitall algorithm
+3. Pair wise exchange algorithm
4. Plum's algorithm
*/
}
}
};
-int (*intel_alltoall_functions_table[])(void *sbuf, int scount,
+int (*intel_alltoall_functions_table[])(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
MPI_Comm comm) ={
Coll_alltoall_bruck::alltoall,
Coll_alltoall_mvapich2_scatter_dest::alltoall,
Coll_alltoall_mvapich2::alltoall//Plum is proprietary ? (and super efficient)
};
-/*I_MPI_ADJUST_BARRIER
+/*I_MPI_ADJUST_BARRIER
-MPI_Barrier
+MPI_Barrier
-1. Dissemination algorithm
-2. Recursive doubling algorithm
-3. Topology aware dissemination algorithm
-4. Topology aware recursive doubling algorithm
-5. Binominal gather + scatter algorithm
-6. Topology aware binominal gather + scatter algorithm
+1. Dissemination algorithm
+2. Recursive doubling algorithm
+3. Topology aware dissemination algorithm
+4. Topology aware recursive doubling algorithm
+5. Binominal gather + scatter algorithm
+6. Topology aware binominal gather + scatter algorithm
*/
static int intel_barrier_gather_scatter(MPI_Comm comm){
};
-/*I_MPI_ADJUST_BCAST
+/*I_MPI_ADJUST_BCAST
-MPI_Bcast
+MPI_Bcast
-1. Binomial algorithm
-2. Recursive doubling algorithm
-3. Ring algorithm
-4. Topology aware binomial algorithm
-5. Topology aware recursive doubling algorithm
-6. Topology aware ring algorithm
-7. Shumilin's bcast algorithm
+1. Binomial algorithm
+2. Recursive doubling algorithm
+3. Ring algorithm
+4. Topology aware binomial algorithm
+5. Topology aware recursive doubling algorithm
+6. Topology aware ring algorithm
+7. Shumilin's bcast algorithm
*/
int (*intel_bcast_functions_table[])(void *buff, int count,
};
-/*I_MPI_ADJUST_REDUCE
+/*I_MPI_ADJUST_REDUCE
-MPI_Reduce
+MPI_Reduce
-1. Shumilin's algorithm
-2. Binomial algorithm
-3. Topology aware Shumilin's algorithm
-4. Topology aware binomial algorithm
-5. Rabenseifner's algorithm
+1. Shumilin's algorithm
+2. Binomial algorithm
+3. Topology aware Shumilin's algorithm
+4. Topology aware binomial algorithm
+5. Rabenseifner's algorithm
6. Topology aware Rabenseifner's algorithm
*/
}
};
-/* I_MPI_ADJUST_REDUCE_SCATTER
+/* I_MPI_ADJUST_REDUCE_SCATTER
-MPI_Reduce_scatter
+MPI_Reduce_scatter
-1. Recursive having algorithm
-2. Pair wise exchange algorithm
-3. Recursive doubling algorithm
-4. Reduce + Scatterv algorithm
-5. Topology aware Reduce + Scatterv algorithm
+1. Recursive having algorithm
+2. Pair wise exchange algorithm
+3. Recursive doubling algorithm
+4. Reduce + Scatterv algorithm
+5. Topology aware Reduce + Scatterv algorithm
*/
static int intel_reduce_scatter_reduce_scatterv(void *sbuf, void *rbuf,
}
};
-/* I_MPI_ADJUST_ALLGATHER
+/* I_MPI_ADJUST_ALLGATHER
-MPI_Allgather
+MPI_Allgather
-1. Recursive doubling algorithm
-2. Bruck's algorithm
-3. Ring algorithm
-4. Topology aware Gatherv + Bcast algorithm
+1. Recursive doubling algorithm
+2. Bruck's algorithm
+3. Ring algorithm
+4. Topology aware Gatherv + Bcast algorithm
*/
-int (*intel_allgather_functions_table[])(void *sbuf, int scount,
+int (*intel_allgather_functions_table[])(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
MPI_Comm comm
) ={
Coll_allgather_rdb::allgather,
}
};
-/* I_MPI_ADJUST_ALLGATHERV
+/* I_MPI_ADJUST_ALLGATHERV
-MPI_Allgatherv
+MPI_Allgatherv
-1. Recursive doubling algorithm
-2. Bruck's algorithm
-3. Ring algorithm
-4. Topology aware Gatherv + Bcast algorithm
+1. Recursive doubling algorithm
+2. Bruck's algorithm
+3. Ring algorithm
+4. Topology aware Gatherv + Bcast algorithm
*/
-int (*intel_allgatherv_functions_table[])(void *sbuf, int scount,
+int (*intel_allgatherv_functions_table[])(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int *rcounts,
+ void* rbuf, int *rcounts,
int *rdispls,
- MPI_Datatype rdtype,
+ MPI_Datatype rdtype,
MPI_Comm comm
) ={
Coll_allgatherv_mpich_rdb::allgatherv,
MPI_Gather
-1. Binomial algorithm
-2. Topology aware binomial algorithm
+1. Binomial algorithm
+2. Topology aware binomial algorithm
3. Shumilin's algorithm
*/
-int (*intel_gather_functions_table[])(void *sbuf, int scount,
+int (*intel_gather_functions_table[])(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
int root,
MPI_Comm comm
) ={
};
-/* I_MPI_ADJUST_SCATTER
+/* I_MPI_ADJUST_SCATTER
-MPI_Scatter
+MPI_Scatter
-1. Binomial algorithm
-2. Topology aware binomial algorithm
-3. Shumilin's algorithm
+1. Binomial algorithm
+2. Topology aware binomial algorithm
+3. Shumilin's algorithm
*/
-int (*intel_scatter_functions_table[])(void *sbuf, int scount,
+int (*intel_scatter_functions_table[])(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
int root, MPI_Comm comm
) ={
Coll_scatter_ompi_binomial::scatter,
-/* I_MPI_ADJUST_ALLTOALLV
+/* I_MPI_ADJUST_ALLTOALLV
-MPI_Alltoallv
+MPI_Alltoallv
-1. Isend/Irecv + waitall algorithm
-2. Plum's algorithm
+1. Isend/Irecv + waitall algorithm
+2. Plum's algorithm
*/
}
},
{ 2147483647,2,{
- {4,1},//0 again
+ {4,1},//0 again
{2147483647,2}
}
}
};
-//These are collected from table 3.5-2 of the Intel MPI Reference Manual
+//These are collected from table 3.5-2 of the Intel MPI Reference Manual
+
-
#define SIZECOMP_reduce_scatter\
int total_message_size = 0;\
for (i = 0; i < comm_size; i++) { \
total_message_size += rcounts[i];\
}\
size_t block_dsize = total_message_size*dtype->size();\
-
+
#define SIZECOMP_allreduce\
size_t block_dsize =rcount * dtype->size();
-
+
#define SIZECOMP_alltoall\
size_t block_dsize =send_count * send_type->size();
total_message_size += recv_count[i];\
}\
size_t block_dsize = total_message_size*recv_type->size();
-
+
#define SIZECOMP_gather\
int rank = comm->rank();\
size_t block_dsize = (send_buff == MPI_IN_PLACE || rank ==root) ?\
#define SIZECOMP_alltoallv\
size_t block_dsize = 1;
-
+
#define IMPI_COLL_SELECT(cat, ret, args, args2)\
ret Coll_ ## cat ## _impi:: cat (COLL_UNPAREN args)\
{\
#include "colls_private.h"
/* This is the default implementation of allreduce. The algorithm is:
-
+
Algorithm: MPI_Allreduce
For the heterogeneous case, we call MPI_Reduce followed by MPI_Bcast
For long messages and for builtin ops and if count >= pof2 (where
pof2 is the nearest power-of-two less than or equal to the number
- of processes), we use Rabenseifner's algorithm (see
+ of processes), we use Rabenseifner's algorithm (see
http://www.hlrs.de/mpi/myreduce.html).
This algorithm implements the allreduce in two steps: first a
reduce-scatter, followed by an allgather. A recursive-halving
algorithm (beginning with processes that are distance 1 apart) is
- used for the reduce-scatter, and a recursive doubling
+ used for the reduce-scatter, and a recursive doubling
algorithm is used for the allgather. The non-power-of-two case is
handled by dropping to the nearest lower power-of-two: the first
few even-numbered processes send their data to their right neighbors
power-of-two processes. At the end, the first few even-numbered
processes get the result from their right neighbors.
- For the power-of-two case, the cost for the reduce-scatter is
+ For the power-of-two case, the cost for the reduce-scatter is
lgp.alpha + n.((p-1)/p).beta + n.((p-1)/p).gamma. The cost for the
allgather lgp.alpha + n.((p-1)/p).beta. Therefore, the
total cost is:
Cost = 2.lgp.alpha + 2.n.((p-1)/p).beta + n.((p-1)/p).gamma
- For the non-power-of-two case,
+ For the non-power-of-two case,
Cost = (2.floor(lgp)+2).alpha + (2.((p-1)/p) + 2).n.beta + n.(1+(p-1)/p).gamma
-
- For short messages, for user-defined ops, and for count < pof2
+
+ For short messages, for user-defined ops, and for count < pof2
we use a recursive doubling algorithm (similar to the one in
MPI_Allgather). We use this algorithm in the case of user-defined ops
because in this case derived datatypes are allowed, and the user
could pass basic datatypes on one process and derived on another as
long as the type maps are the same. Breaking up derived datatypes
- to do the reduce-scatter is tricky.
+ to do the reduce-scatter is tricky.
Cost = lgp.alpha + n.lgp.beta + n.lgp.gamma
- Possible improvements:
+ Possible improvements:
End Algorithm: MPI_Allreduce
*/
if (block_dsize > large_message && count >= pof2 && (op==MPI_OP_NULL || op->is_commutative())) {
//for long messages
- return (Coll_allreduce_rab_rdb::allreduce (sbuf, rbuf,
+ return (Coll_allreduce_rab_rdb::allreduce (sbuf, rbuf,
count, dtype,
op, comm));
}else {
//for short ones and count < pof2
- return (Coll_allreduce_rdb::allreduce (sbuf, rbuf,
+ return (Coll_allreduce_rdb::allreduce (sbuf, rbuf,
count, dtype,
op, comm));
}
/* This is the default implementation of alltoall. The algorithm is:
-
+
Algorithm: MPI_Alltoall
We use four algorithms for alltoall. For short messages and
processes, so that all processes don't try to send/recv to/from the
same process at the same time.
- *** Modification: We post only a small number of isends and irecvs
+ *** Modification: We post only a small number of isends and irecvs
at a time and wait on them as suggested by Tony Ladd. ***
- *** See comments below about an additional modification that
+ *** See comments below about an additional modification that
we may want to consider ***
For long messages and power-of-two number of processes, we use a
This algorithm doesn't work if the number of processes is not a power of
two. For a non-power-of-two number of processes, we use an
algorithm in which, in step i, each process receives from (rank-i)
- and sends to (rank+i).
+ and sends to (rank+i).
Cost = (p-1).alpha + n.beta
where n is the total amount of data a process needs to send to all
other processes.
- Possible improvements:
+ Possible improvements:
End Algorithm: MPI_Alltoall
*/
-int Coll_alltoall_mpich::alltoall( void *sbuf, int scount,
+int Coll_alltoall_mpich::alltoall( void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
MPI_Comm comm)
{
int communicator_size;
unsigned int short_size=256;
unsigned int medium_size=32768;
//short size and comm_size >=8 -> bruck
-
+
// medium size messages and (short messages for comm_size < 8), we
// use an algorithm that posts all irecvs and isends and then does a
-// waitall.
-
+// waitall.
+
// For long messages and power-of-two number of processes, we use a
// pairwise exchange algorithm
// For a non-power-of-two number of processes, we use an
// algorithm in which, in step i, each process receives from (rank-i)
-// and sends to (rank+i).
+// and sends to (rank+i).
dsize = sdtype->size();
block_dsize = dsize * scount;
if ((block_dsize < short_size) && (communicator_size >= 8)) {
- return Coll_alltoall_bruck::alltoall(sbuf, scount, sdtype,
+ return Coll_alltoall_bruck::alltoall(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
} else if (block_dsize < medium_size) {
- return Coll_alltoall_basic_linear::alltoall(sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
+ return Coll_alltoall_basic_linear::alltoall(sbuf, scount, sdtype,
+ rbuf, rcount, rdtype,
comm);
}else if (communicator_size%2){
- return Coll_alltoall_ring::alltoall(sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
+ return Coll_alltoall_ring::alltoall(sbuf, scount, sdtype,
+ rbuf, rcount, rdtype,
comm);
}
)
{
/* For starters, just keep the original algorithm. */
- return Coll_alltoallv_bruck::alltoallv(sbuf, scounts, sdisps, sdtype,
+ return Coll_alltoallv_bruck::alltoallv(sbuf, scounts, sdisps, sdtype,
rbuf, rcounts, rdisps,rdtype,
comm);
}
int Coll_barrier_mpich::barrier(MPI_Comm comm)
-{
+{
return Coll_barrier_ompi_bruck::barrier(comm);
}
/* This is the default implementation of broadcast. The algorithm is:
-
+
Algorithm: MPI_Bcast
- For short messages, we use a binomial tree algorithm.
+ For short messages, we use a binomial tree algorithm.
Cost = lgp.alpha + n.lgp.beta
- For long messages, we do a scatter followed by an allgather.
+ For long messages, we do a scatter followed by an allgather.
We first scatter the buffer using a binomial tree algorithm. This costs
lgp.alpha + n.((p-1)/p).beta
If the datatype is contiguous and the communicator is homogeneous,
cases, we first pack the data into a temporary buffer by using
MPI_Pack, scatter it as bytes, and unpack it after the allgather.
- For the allgather, we use a recursive doubling algorithm for
+ For the allgather, we use a recursive doubling algorithm for
medium-size messages and power-of-two number of processes. This
takes lgp steps. In each step pairs of processes exchange all the
data they have (we take care of non-power-of-two situations). This
versus n.lgp.beta. Therefore, for long messages and when lgp > 2,
this algorithm will perform better.
- For long messages and for medium-size messages and non-power-of-two
- processes, we use a ring algorithm for the allgather, which
+ For long messages and for medium-size messages and non-power-of-two
+ processes, we use a ring algorithm for the allgather, which
takes p-1 steps, because it performs better than recursive doubling.
Total Cost = (lgp+p-1).alpha + 2.n.((p-1)/p).beta
- Possible improvements:
+ Possible improvements:
For clusters of SMPs, we may want to do something differently to
take advantage of shared memory on each node.
MPI_Comm comm
)
{
- /* Decision function based on MX results for
+ /* Decision function based on MX results for
messages up to 36MB and communicator sizes up to 64 nodes */
const size_t small_message_size = 12288;
const size_t intermediate_message_size = 524288;
dsize = datatype->size();
message_size = dsize * (unsigned long)count; /* needed for decision */
- /* Handle messages of small and intermediate size, and
+ /* Handle messages of small and intermediate size, and
single-element broadcasts */
if ((message_size < small_message_size) || (communicator_size <= 8)) {
/* Binomial without segmentation */
- return Coll_bcast_binomial_tree::bcast (buff, count, datatype,
+ return Coll_bcast_binomial_tree::bcast (buff, count, datatype,
root, comm);
} else if (message_size < intermediate_message_size && !(communicator_size%2)) {
// SplittedBinary with 1KB segments
- return Coll_bcast_scatter_rdb_allgather::bcast(buff, count, datatype,
+ return Coll_bcast_scatter_rdb_allgather::bcast(buff, count, datatype,
root, comm);
}
- //Handle large message sizes
- return Coll_bcast_scatter_LR_allgather::bcast (buff, count, datatype,
+ //Handle large message sizes
+ return Coll_bcast_scatter_LR_allgather::bcast (buff, count, datatype,
root, comm);
-
+
}
/* This is the default implementation of reduce. The algorithm is:
-
+
Algorithm: MPI_Reduce
For long messages and for builtin ops and if count >= pof2 (where
pof2 is the nearest power-of-two less than or equal to the number
- of processes), we use Rabenseifner's algorithm (see
+ of processes), we use Rabenseifner's algorithm (see
http://www.hlrs.de/organization/par/services/models/mpi/myreduce.html ).
This algorithm implements the reduce in two steps: first a
reduce-scatter, followed by a gather to the root. A
the root and exits; the root now acts as rank 0 in the binomial tree
algorithm for gather.
- For the power-of-two case, the cost for the reduce-scatter is
+ For the power-of-two case, the cost for the reduce-scatter is
lgp.alpha + n.((p-1)/p).beta + n.((p-1)/p).gamma. The cost for the
gather to root is lgp.alpha + n.((p-1)/p).beta. Therefore, the
total cost is:
For the non-power-of-two case, assuming the root is not one of the
odd-numbered processes that get excluded in the reduce-scatter,
- Cost = (2.floor(lgp)+1).alpha + (2.((p-1)/p) + 1).n.beta +
+ Cost = (2.floor(lgp)+1).alpha + (2.((p-1)/p) + 1).n.beta +
n.(1+(p-1)/p).gamma
For short messages, user-defined ops, and count < pof2, we use a
- binomial tree algorithm for both short and long messages.
+ binomial tree algorithm for both short and long messages.
Cost = lgp.alpha + n.lgp.beta + n.lgp.gamma
should be able to use the reduce-scatter/gather approach as long as
count >= pof2. [goodell@ 2009-01-21]
- Possible improvements:
+ Possible improvements:
End Algorithm: MPI_Reduce
*/
pof2 >>= 1;
if ((count < pof2) || (message_size < 2048) || (op != MPI_OP_NULL && not op->is_commutative())) {
- return Coll_reduce_binomial::reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
+ return Coll_reduce_binomial::reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
}
return Coll_reduce_scatter_gather::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
segsize, max_requests*/);
if(sbuf==rbuf)sbuf=MPI_IN_PLACE; //restore MPI_IN_PLACE as these algorithms handle it
XBT_DEBUG("Coll_reduce_scatter_mpich::reduce");
-
+
comm_size = comm->size();
- // We need data size for decision function
+ // We need data size for decision function
total_message_size = 0;
- for (i = 0; i < comm_size; i++) {
+ for (i = 0; i < comm_size; i++) {
total_message_size += rcounts[i];
}
- if( (op==MPI_OP_NULL || op->is_commutative()) && total_message_size > 524288) {
- return Coll_reduce_scatter_mpich_pair::reduce_scatter (sbuf, rbuf, rcounts,
- dtype, op,
+ if( (op==MPI_OP_NULL || op->is_commutative()) && total_message_size > 524288) {
+ return Coll_reduce_scatter_mpich_pair::reduce_scatter (sbuf, rbuf, rcounts,
+ dtype, op,
comm);
} else if ((op != MPI_OP_NULL && not op->is_commutative())) {
int is_block_regular = 1;
}
return Coll_reduce_scatter_mpich_rdb::reduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm);
- }else{
+ }else{
return Coll_reduce_scatter_mpich_rdb::reduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm);
}
}
/* This is the default implementation of allgather. The algorithm is:
-
+
Algorithm: MPI_Allgather
For short messages and non-power-of-two no. of processes, we use
neighbor) performs twice as fast as recursive doubling for long
messages (on Myrinet and IBM SP).
- Possible improvements:
+ Possible improvements:
End Algorithm: MPI_Allgather
*/
-int Coll_allgather_mpich::allgather(void *sbuf, int scount,
+int Coll_allgather_mpich::allgather(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
MPI_Comm comm
)
{
/* Determine complete data size */
dsize=sdtype->size();
- total_dsize = dsize * scount * communicator_size;
-
- for (pow2_size = 1; pow2_size < communicator_size; pow2_size <<=1);
+ total_dsize = dsize * scount * communicator_size;
- /* Decision as in MPICH-2
- presented in Thakur et.al. "Optimization of Collective Communication
- Operations in MPICH", International Journal of High Performance Computing
+ for (pow2_size = 1; pow2_size < communicator_size; pow2_size <<=1);
+
+ /* Decision as in MPICH-2
+ presented in Thakur et.al. "Optimization of Collective Communication
+ Operations in MPICH", International Journal of High Performance Computing
Applications, Vol. 19, No. 1, 49-66 (2005)
- - for power-of-two processes and small and medium size messages
+ - for power-of-two processes and small and medium size messages
(up to 512KB) use recursive doubling
- for non-power-of-two processes and small messages (80KB) use bruck,
- for everything else use ring.
*/
if ((pow2_size == communicator_size) && (total_dsize < 524288)) {
- return Coll_allgather_rdb::allgather(sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
+ return Coll_allgather_rdb::allgather(sbuf, scount, sdtype,
+ rbuf, rcount, rdtype,
comm);
- } else if (total_dsize <= 81920) {
- return Coll_allgather_bruck::allgather(sbuf, scount, sdtype,
+ } else if (total_dsize <= 81920) {
+ return Coll_allgather_bruck::allgather(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
- }
- return Coll_allgather_ring::allgather(sbuf, scount, sdtype,
+ }
+ return Coll_allgather_ring::allgather(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
}
/* This is the default implementation of allgatherv. The algorithm is:
-
+
Algorithm: MPI_Allgatherv
For short messages and non-power-of-two no. of processes, we use
Cost = (p-1).alpha + n.((p-1)/p).beta
- Possible improvements:
+ Possible improvements:
End Algorithm: MPI_Allgatherv
*/
-int Coll_allgatherv_mpich::allgatherv(void *sbuf, int scount,
+int Coll_allgatherv_mpich::allgatherv(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int *rcounts,
+ void* rbuf, int *rcounts,
int *rdispls,
- MPI_Datatype rdtype,
+ MPI_Datatype rdtype,
MPI_Comm comm
)
{
total_dsize += rcounts[i];
if (total_dsize == 0)
return MPI_SUCCESS;
-
- for (pow2_size = 1; pow2_size < communicator_size; pow2_size <<=1);
+
+ for (pow2_size = 1; pow2_size < communicator_size; pow2_size <<=1);
if ((pow2_size == communicator_size) && (total_dsize < 524288)) {
- return Coll_allgatherv_mpich_rdb::allgatherv(sbuf, scount, sdtype,
- rbuf, rcounts, rdispls, rdtype,
+ return Coll_allgatherv_mpich_rdb::allgatherv(sbuf, scount, sdtype,
+ rbuf, rcounts, rdispls, rdtype,
comm);
- } else if (total_dsize <= 81920) {
- return Coll_allgatherv_ompi_bruck::allgatherv(sbuf, scount, sdtype,
+ } else if (total_dsize <= 81920) {
+ return Coll_allgatherv_ompi_bruck::allgatherv(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm);
- }
+ }
return Coll_allgatherv_mpich_ring::allgatherv(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm);
}
/* This is the default implementation of gather. The algorithm is:
-
+
Algorithm: MPI_Gather
We use a binomial tree algorithm for both short and long
Cost = lgp.alpha + n.((p-1)/p).beta
where n is the total size of the data gathered at the root.
- Possible improvements:
+ Possible improvements:
End Algorithm: MPI_Gather
*/
-int Coll_gather_mpich::gather(void *sbuf, int scount,
+int Coll_gather_mpich::gather(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
int root,
MPI_Comm comm
)
{
- return Coll_gather_ompi_binomial::gather (sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
+ return Coll_gather_ompi_binomial::gather (sbuf, scount, sdtype,
+ rbuf, rcount, rdtype,
root, comm);
}
/* This is the default implementation of scatter. The algorithm is:
-
+
Algorithm: MPI_Scatter
We use a binomial tree algorithm for both short and
long messages. At nodes other than leaf nodes we need to allocate
a temporary buffer to store the incoming message. If the root is
- not rank 0, we reorder the sendbuf in order of relative ranks by
+ not rank 0, we reorder the sendbuf in order of relative ranks by
copying it into a temporary buffer, so that all the sends from the
root are contiguous and in the right order. In the heterogeneous
case, we first pack the buffer by using MPI_Pack and then do the
- scatter.
+ scatter.
Cost = lgp.alpha + n.((p-1)/p).beta
where n is the total size of the data to be scattered from the root.
- Possible improvements:
+ Possible improvements:
End Algorithm: MPI_Scatter
*/
-int Coll_scatter_mpich::scatter(void *sbuf, int scount,
+int Coll_scatter_mpich::scatter(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
int root, MPI_Comm comm
)
{
sdtype=rdtype;
}
int ret= Coll_scatter_ompi_binomial::scatter (sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
+ rbuf, rcount, rdtype,
root, comm);
if(comm->rank()!=root){
xbt_free(sbuf);
namespace smpi{
-int Coll_alltoall_mvapich2::alltoall( void *sendbuf, int sendcount,
+int Coll_alltoall_mvapich2::alltoall( void *sendbuf, int sendcount,
MPI_Datatype sendtype,
void* recvbuf, int recvcount,
MPI_Datatype recvtype,
if(mv2_allgather_table_ppn_conf==NULL)
init_mv2_allgather_tables_stampede();
-
+
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
}
if (partial_sub_ok != 1) {
conf_index = 0;
}
-
+
/* Search for the corresponding system size inside the tuning table */
while ((range < (mv2_size_allgather_tuning_table[conf_index] - 1)) &&
(comm_size >
-1)) {
range_intra_threshold++;
}
-
+
if (comm->is_blocked() ) {
- // Set intra-node function pt for gather_two_level
- MV2_Gather_intra_node_function =
+ // Set intra-node function pt for gather_two_level
+ MV2_Gather_intra_node_function =
mv2_gather_thresholds_table[range].intra_node[range_intra_threshold].
MV2_pt_Gather_function;
- //Set inter-leader pt
+ //Set inter-leader pt
MV2_Gather_inter_leader_function =
mv2_gather_thresholds_table[range].inter_leader[range_threshold].
MV2_pt_Gather_function;
- // We call Gather function
+ // We call Gather function
mpi_errno =
MV2_Gather_inter_leader_function(sendbuf, sendcnt, sendtype, recvbuf, recvcnt,
recvtype, root, comm);
int Coll_barrier_mvapich2::barrier(MPI_Comm comm)
-{
+{
return Coll_barrier_mvapich2_pair::barrier(comm);
}
int mpi_errno = MPI_SUCCESS;
int comm_size/*, rank*/;
int two_level_bcast = 1;
- long nbytes = 0;
+ long nbytes = 0;
int range = 0;
int range_threshold = 0;
int range_threshold_intra = 0;
if (mv2_bcast_thresholds_table[range].inter_leader[range_threshold].
zcpy_pipelined_knomial_factor != -1) {
- zcpy_knomial_factor =
+ zcpy_knomial_factor =
mv2_bcast_thresholds_table[range].inter_leader[range_threshold].
zcpy_pipelined_knomial_factor;
}
/* Set value of pipeline segment size */
bcast_segment_size = mv2_bcast_thresholds_table[range].bcast_segment_size;
-
+
/* Set value of inter node knomial factor */
mv2_inter_node_knomial_factor = mv2_bcast_thresholds_table[range].inter_node_knomial_factor;
/* Check if we will use a two level algorithm or not */
two_level_bcast =
#if defined(_MCST_SUPPORT_)
- mv2_bcast_thresholds_table[range].is_two_level_bcast[range_threshold]
+ mv2_bcast_thresholds_table[range].is_two_level_bcast[range_threshold]
|| comm->ch.is_mcast_ok;
#else
mv2_bcast_thresholds_table[range].is_two_level_bcast[range_threshold];
(&MPIR_Pipelined_Bcast_Zcpy_MV2 == MV2_Bcast_function)) {
if (not is_contig || not is_homogeneous) {
mpi_errno = MPIR_Pipelined_Bcast_Zcpy_MV2(tmp_buf, nbytes, MPI_BYTE, root, comm);
- } else {
+ } else {
mpi_errno = MPIR_Pipelined_Bcast_Zcpy_MV2(buffer, count, datatype,
root, comm);
- }
- } else
+ }
+ } else
#endif /* defined(CHANNEL_MRAIL_GEN2) */
- {
+ {
shmem_comm = comm->get_intra_comm();
if (not is_contig || not is_homogeneous) {
mpi_errno = MPIR_Bcast_tune_inter_node_helper_MV2(tmp_buf, nbytes, MPI_BYTE, root, comm);
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
}
- mpi_errno = MPIR_Reduce_two_level_helper_MV2(sendbuf, recvbuf, count,
+ mpi_errno = MPIR_Reduce_two_level_helper_MV2(sendbuf, recvbuf, count,
datatype, op, root, comm);
} else {
mpi_errno = MPIR_Reduce_binomial_MV2(sendbuf, recvbuf, count,
} else if(MV2_Reduce_function == &MPIR_Reduce_inter_knomial_wrapper_MV2 ){
if(is_commutative ==1)
{
- mpi_errno = MV2_Reduce_function(sendbuf, recvbuf, count,
+ mpi_errno = MV2_Reduce_function(sendbuf, recvbuf, count,
datatype, op, root, comm);
} else {
mpi_errno = MPIR_Reduce_binomial_MV2(sendbuf, recvbuf, count,
} else if(MV2_Reduce_function == &MPIR_Reduce_redscat_gather_MV2){
if (/*(HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) &&*/ (count >= pof2))
{
- mpi_errno = MV2_Reduce_function(sendbuf, recvbuf, count,
+ mpi_errno = MV2_Reduce_function(sendbuf, recvbuf, count,
datatype, op, root, comm);
} else {
mpi_errno = MPIR_Reduce_binomial_MV2(sendbuf, recvbuf, count,
datatype, op, root, comm);
}
} else {
- mpi_errno = MV2_Reduce_function(sendbuf, recvbuf, count,
+ mpi_errno = MV2_Reduce_function(sendbuf, recvbuf, count,
datatype, op, root, comm);
}
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
}
-
+
comm_size = comm->size();
rank = comm->rank();
recvtype_size=recvtype->size();
nbytes = recvcnt * recvtype_size;
}
-
- // check if safe to use partial subscription mode
+
+ // check if safe to use partial subscription mode
if (comm->is_uniform()) {
shmem_comm = comm->get_intra_comm();
local_size = shmem_comm->size();
i = 0;
if (mv2_scatter_table_ppn_conf[0] == -1) {
- // Indicating user defined tuning
+ // Indicating user defined tuning
conf_index = 0;
}else{
do {
} while(i < mv2_scatter_num_ppn_conf);
}
}
-
+
if (partial_sub_ok != 1) {
conf_index = 0;
}
/* Fallback! */
MV2_Scatter_function = &MPIR_Scatter_MV2_Binomial;
}
- }
+ }
}
if( (MV2_Scatter_function == &MPIR_Scatter_MV2_two_level_Direct) ||
#define MPIR_Alltoall_RD_MV2 simgrid::smpi::Coll_alltoall_rdb::alltoall
#define MPIR_Alltoall_Scatter_dest_MV2 simgrid::smpi::Coll_alltoall_mvapich2_scatter_dest::alltoall
#define MPIR_Alltoall_pairwise_MV2 simgrid::smpi::Coll_alltoall_pair::alltoall
-#define MPIR_Alltoall_inplace_MV2 simgrid::smpi::Coll_alltoall_ring::alltoall
+#define MPIR_Alltoall_inplace_MV2 simgrid::smpi::Coll_alltoall_ring::alltoall
static void init_mv2_alltoall_tables_stampede(){
} mv2_gather_tuning_table;
int mv2_size_gather_tuning_table=7;
-mv2_gather_tuning_table * mv2_gather_thresholds_table=NULL;
+mv2_gather_tuning_table * mv2_gather_thresholds_table=NULL;
typedef int (*MV2_Gather_function_ptr) (void *sendbuf,
int sendcnt,
int count,
MPI_Datatype datatype,
MPI_Op op, MPI_Comm comm)
-{
+{
return 0;
}
* mv2_size_scatter_tuning_table[i]));
}
xbt_free(table_ptrs);
-
+
}
/**
* Decision function based on MX results from the Grig cluster at UTK.
- *
- * Currently, linear, recursive doubling, and nonoverlapping algorithms
+ *
+ * Currently, linear, recursive doubling, and nonoverlapping algorithms
* can handle both commutative and non-commutative operations.
* Ring algorithm does not support non-commutative operations.
*/
block_dsize = dsize * count;
if (block_dsize < intermediate_message) {
- return (Coll_allreduce_rdb::allreduce (sbuf, rbuf,
+ return (Coll_allreduce_rdb::allreduce (sbuf, rbuf,
count, dtype,
op, comm));
- }
+ }
if( ((op==MPI_OP_NULL) || op->is_commutative()) && (count > comm_size) ) {
const size_t segment_size = 1 << 20; /* 1 MB */
op, comm);
} else {
return (Coll_allreduce_ompi_ring_segmented::allreduce (sbuf, rbuf,
- count, dtype,
- op, comm
+ count, dtype,
+ op, comm
/*segment_size*/));
}
}
- return (Coll_allreduce_redbcast::allreduce(sbuf, rbuf, count,
+ return (Coll_allreduce_redbcast::allreduce(sbuf, rbuf, count,
dtype, op, comm));
}
-int Coll_alltoall_ompi::alltoall( void *sbuf, int scount,
+int Coll_alltoall_ompi::alltoall( void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
MPI_Comm comm)
{
int communicator_size;
size_t dsize, block_dsize;
communicator_size = comm->size();
- /* Decision function based on measurement on Grig cluster at
+ /* Decision function based on measurement on Grig cluster at
the University of Tennessee (2GB MX) up to 64 nodes.
Has better performance for messages of intermediate sizes than the old one */
/* determine block size */
block_dsize = dsize * scount;
if ((block_dsize < 200) && (communicator_size > 12)) {
- return Coll_alltoall_bruck::alltoall(sbuf, scount, sdtype,
+ return Coll_alltoall_bruck::alltoall(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
} else if (block_dsize < 3000) {
- return Coll_alltoall_basic_linear::alltoall(sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
+ return Coll_alltoall_basic_linear::alltoall(sbuf, scount, sdtype,
+ rbuf, rcount, rdtype,
comm);
}
- return Coll_alltoall_ring::alltoall (sbuf, scount, sdtype,
+ return Coll_alltoall_ring::alltoall (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
}
)
{
/* For starters, just keep the original algorithm. */
- return Coll_alltoallv_ompi_basic_linear::alltoallv(sbuf, scounts, sdisps, sdtype,
+ return Coll_alltoallv_ompi_basic_linear::alltoallv(sbuf, scounts, sdisps, sdtype,
rbuf, rcounts, rdisps,rdtype,
comm);
}
MPI_Comm comm
)
{
- /* Decision function based on MX results for
+ /* Decision function based on MX results for
messages up to 36MB and communicator sizes up to 64 nodes */
const size_t small_message_size = 2048;
const size_t intermediate_message_size = 370728;
const double a_p16 = 3.2118e-6; /* [1 / byte] */
- const double b_p16 = 8.7936;
+ const double b_p16 = 8.7936;
const double a_p64 = 2.3679e-6; /* [1 / byte] */
- const double b_p64 = 1.1787;
+ const double b_p64 = 1.1787;
const double a_p128 = 1.6134e-6; /* [1 / byte] */
const double b_p128 = 2.1102;
dsize = datatype->size();
message_size = dsize * (unsigned long)count; /* needed for decision */
- /* Handle messages of small and intermediate size, and
+ /* Handle messages of small and intermediate size, and
single-element broadcasts */
if ((message_size < small_message_size) || (count <= 1)) {
/* Binomial without segmentation */
- return Coll_bcast_binomial_tree::bcast (buff, count, datatype,
+ return Coll_bcast_binomial_tree::bcast (buff, count, datatype,
root, comm);
} else if (message_size < intermediate_message_size) {
// SplittedBinary with 1KB segments
- return Coll_bcast_ompi_split_bintree::bcast(buff, count, datatype,
+ return Coll_bcast_ompi_split_bintree::bcast(buff, count, datatype,
root, comm);
}
- //Handle large message sizes
+ //Handle large message sizes
else if (communicator_size < (a_p128 * message_size + b_p128)) {
- //Pipeline with 128KB segments
+ //Pipeline with 128KB segments
//segsize = 1024 << 7;
- return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype,
+ return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype,
root, comm);
-
+
} else if (communicator_size < 13) {
- // Split Binary with 8KB segments
- return Coll_bcast_ompi_split_bintree::bcast(buff, count, datatype,
+ // Split Binary with 8KB segments
+ return Coll_bcast_ompi_split_bintree::bcast(buff, count, datatype,
root, comm);
-
+
} else if (communicator_size < (a_p64 * message_size + b_p64)) {
- // Pipeline with 64KB segments
+ // Pipeline with 64KB segments
//segsize = 1024 << 6;
- return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype,
+ return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype,
root, comm);
-
+
} else if (communicator_size < (a_p16 * message_size + b_p16)) {
- //Pipeline with 16KB segments
+ //Pipeline with 16KB segments
//segsize = 1024 << 4;
- return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype,
+ return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype,
root, comm);
-
+
}
/* Pipeline with 8KB segments */
//segsize = 1024 << 3;
- return Coll_bcast_flattree_pipeline::bcast (buff, count, datatype,
+ return Coll_bcast_flattree_pipeline::bcast (buff, count, datatype,
root, comm
/*segsize*/);
#if 0
message_size = dsize * count; /* needed for decision */
/**
- * If the operation is non commutative we currently have choice of linear
+ * If the operation is non commutative we currently have choice of linear
* or in-order binary tree algorithm.
*/
if ((op != MPI_OP_NULL) && not op->is_commutative()) {
return Coll_reduce_ompi_basic_linear::reduce(sendbuf, recvbuf, count, datatype, op, root, comm /*, module*/);
}
return Coll_reduce_ompi_in_order_binary::reduce(sendbuf, recvbuf, count, datatype, op, root, comm /*, module,
- 0, max_requests*/);
+ 0, max_requests*/);
}
if ((communicator_size < 8) && (message_size < 512)){
/* Linear_0K */
- return Coll_reduce_ompi_basic_linear::reduce (sendbuf, recvbuf, count, datatype, op, root, comm);
+ return Coll_reduce_ompi_basic_linear::reduce (sendbuf, recvbuf, count, datatype, op, root, comm);
} else if (((communicator_size < 8) && (message_size < 20480)) ||
(message_size < 2048) || (count <= 1)) {
/* Binomial_0K */
return Coll_reduce_ompi_binomial::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
segsize, max_requests*/);
} else if (communicator_size > (a1 * message_size + b1)) {
- // Binomial_1K
+ // Binomial_1K
//segsize = 1024;
return Coll_reduce_ompi_binomial::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
segsize, max_requests*/);
} else if (communicator_size > (a2 * message_size + b2)) {
- // Pipeline_1K
+ // Pipeline_1K
//segsize = 1024;
- return Coll_reduce_ompi_pipeline::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+ return Coll_reduce_ompi_pipeline::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
segsize, max_requests*/);
} else if (communicator_size > (a3 * message_size + b3)) {
- // Binary_32K
+ // Binary_32K
//segsize = 32*1024;
return Coll_reduce_ompi_binary::reduce( sendbuf, recvbuf, count, datatype, op, root,
comm/*, module, segsize, max_requests*/);
}
// if (communicator_size > (a4 * message_size + b4)) {
- // Pipeline_32K
+ // Pipeline_32K
// segsize = 32*1024;
// } else {
- // Pipeline_64K
+ // Pipeline_64K
// segsize = 64*1024;
// }
- return Coll_reduce_ompi_pipeline::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+ return Coll_reduce_ompi_pipeline::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
segsize, max_requests*/);
#if 0
fanout = communicator_size - 1;
/* when linear implemented or taken from basic put here, right now using chain as a linear system */
/* it is implemented and I shouldn't be calling a chain with a fanout bigger than MAXTREEFANOUT from topo.h! */
- return Coll_reduce_intra_basic_linear::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, module);
+ return Coll_reduce_intra_basic_linear::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, module);
/* return Coll_reduce_intra_chain::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); */
}
if (message_size < 524288) {
int zerocounts = 0;
XBT_DEBUG("Coll_reduce_scatter_ompi::reduce_scatter");
-
+
comm_size = comm->size();
- // We need data size for decision function
+ // We need data size for decision function
dsize=dtype->size();
total_message_size = 0;
- for (i = 0; i < comm_size; i++) {
+ for (i = 0; i < comm_size; i++) {
total_message_size += rcounts[i];
if (0 == rcounts[i]) {
zerocounts = 1;
Coll_reduce_scatter_default::reduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm);
return MPI_SUCCESS;
}
-
+
total_message_size *= dsize;
- // compute the nearest power of 2
+ // compute the nearest power of 2
for (pow2 = 1; pow2 < comm_size; pow2 <<= 1);
if ((total_message_size <= small_message_size) ||
((total_message_size <= large_message_size) && (pow2 == comm_size)) ||
(comm_size >= a * total_message_size + b)) {
- return
+ return
Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(sbuf, rbuf, rcounts,
dtype, op,
comm);
- }
+ }
return Coll_reduce_scatter_ompi_ring::reduce_scatter(sbuf, rbuf, rcounts,
dtype, op,
comm);
}
-int Coll_allgather_ompi::allgather(void *sbuf, int scount,
+int Coll_allgather_ompi::allgather(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
MPI_Comm comm
)
{
/* Special case for 2 processes */
if (communicator_size == 2) {
- return Coll_allgather_pair::allgather (sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
+ return Coll_allgather_pair::allgather (sbuf, scount, sdtype,
+ rbuf, rcount, rdtype,
comm/*, module*/);
}
/* Determine complete data size */
dsize=sdtype->size();
- total_dsize = dsize * scount * communicator_size;
-
- for (pow2_size = 1; pow2_size < communicator_size; pow2_size <<=1);
-
- /* Decision based on MX 2Gb results from Grig cluster at
- The University of Tennesse, Knoxville
- - if total message size is less than 50KB use either bruck or
- recursive doubling for non-power of two and power of two nodes,
+ total_dsize = dsize * scount * communicator_size;
+
+ for (pow2_size = 1; pow2_size < communicator_size; pow2_size <<=1);
+
+ /* Decision based on MX 2Gb results from Grig cluster at
+ The University of Tennesse, Knoxville
+ - if total message size is less than 50KB use either bruck or
+ recursive doubling for non-power of two and power of two nodes,
respectively.
- - else use ring and neighbor exchange algorithms for odd and even
+ - else use ring and neighbor exchange algorithms for odd and even
number of nodes, respectively.
*/
if (total_dsize < 50000) {
if (pow2_size == communicator_size) {
- return Coll_allgather_rdb::allgather(sbuf, scount, sdtype,
+ return Coll_allgather_rdb::allgather(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
} else {
- return Coll_allgather_bruck::allgather(sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
+ return Coll_allgather_bruck::allgather(sbuf, scount, sdtype,
+ rbuf, rcount, rdtype,
comm);
}
} else {
if (communicator_size % 2) {
- return Coll_allgather_ring::allgather(sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
+ return Coll_allgather_ring::allgather(sbuf, scount, sdtype,
+ rbuf, rcount, rdtype,
comm);
} else {
return Coll_allgather_ompi_neighborexchange::allgather(sbuf, scount, sdtype,
comm);
}
}
-
+
#if defined(USE_MPICH2_DECISION)
- /* Decision as in MPICH-2
- presented in Thakur et.al. "Optimization of Collective Communication
- Operations in MPICH", International Journal of High Performance Computing
+ /* Decision as in MPICH-2
+ presented in Thakur et.al. "Optimization of Collective Communication
+ Operations in MPICH", International Journal of High Performance Computing
Applications, Vol. 19, No. 1, 49-66 (2005)
- - for power-of-two processes and small and medium size messages
+ - for power-of-two processes and small and medium size messages
(up to 512KB) use recursive doubling
- for non-power-of-two processes and small messages (80KB) use bruck,
- for everything else use ring.
*/
if ((pow2_size == communicator_size) && (total_dsize < 524288)) {
- return Coll_allgather_rdb::allgather(sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
+ return Coll_allgather_rdb::allgather(sbuf, scount, sdtype,
+ rbuf, rcount, rdtype,
comm);
- } else if (total_dsize <= 81920) {
- return Coll_allgather_bruck::allgather(sbuf, scount, sdtype,
+ } else if (total_dsize <= 81920) {
+ return Coll_allgather_bruck::allgather(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
- }
- return Coll_allgather_ring::allgather(sbuf, scount, sdtype,
+ }
+ return Coll_allgather_ring::allgather(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
#endif /* defined(USE_MPICH2_DECISION) */
}
-int Coll_allgatherv_ompi::allgatherv(void *sbuf, int scount,
+int Coll_allgatherv_ompi::allgatherv(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int *rcounts,
+ void* rbuf, int *rcounts,
int *rdispls,
- MPI_Datatype rdtype,
+ MPI_Datatype rdtype,
MPI_Comm comm
)
{
int i;
int communicator_size;
size_t dsize, total_dsize;
-
+
communicator_size = comm->size();
-
+
/* Special case for 2 processes */
if (communicator_size == 2) {
return Coll_allgatherv_pair::allgatherv(sbuf, scount, sdtype,
- rbuf, rcounts, rdispls, rdtype,
+ rbuf, rcounts, rdispls, rdtype,
comm);
}
-
+
/* Determine complete data size */
dsize=sdtype->size();
total_dsize = 0;
for (i = 0; i < communicator_size; i++) {
total_dsize += dsize * rcounts[i];
}
-
+
/* Decision based on allgather decision. */
if (total_dsize < 50000) {
-/* return Coll_allgatherv_intra_bruck::allgatherv(sbuf, scount, sdtype,
- rbuf, rcounts, rdispls, rdtype,
+/* return Coll_allgatherv_intra_bruck::allgatherv(sbuf, scount, sdtype,
+ rbuf, rcounts, rdispls, rdtype,
comm, module);*/
- return Coll_allgatherv_ring::allgatherv(sbuf, scount, sdtype,
- rbuf, rcounts, rdispls, rdtype,
+ return Coll_allgatherv_ring::allgatherv(sbuf, scount, sdtype,
+ rbuf, rcounts, rdispls, rdtype,
comm);
} else {
if (communicator_size % 2) {
- return Coll_allgatherv_ring::allgatherv(sbuf, scount, sdtype,
- rbuf, rcounts, rdispls, rdtype,
+ return Coll_allgatherv_ring::allgatherv(sbuf, scount, sdtype,
+ rbuf, rcounts, rdispls, rdtype,
comm);
} else {
return Coll_allgatherv_ompi_neighborexchange::allgatherv(sbuf, scount, sdtype,
- rbuf, rcounts, rdispls, rdtype,
+ rbuf, rcounts, rdispls, rdtype,
comm);
}
}
}
-int Coll_gather_ompi::gather(void *sbuf, int scount,
+int Coll_gather_ompi::gather(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
int root,
MPI_Comm comm
)
communicator_size = comm->size();
rank = comm->rank();
- // Determine block size
+ // Determine block size
if (rank == root) {
dsize = rdtype->size();
block_size = dsize * rcount;
/* root, comm);*/
/* } else*/ if (block_size > intermediate_block_size) {
- return Coll_gather_ompi_linear_sync::gather (sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
+ return Coll_gather_ompi_linear_sync::gather (sbuf, scount, sdtype,
+ rbuf, rcount, rdtype,
root, comm);
} else if ((communicator_size > large_communicator_size) ||
((communicator_size > small_communicator_size) &&
(block_size < small_block_size))) {
- return Coll_gather_ompi_binomial::gather (sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
+ return Coll_gather_ompi_binomial::gather (sbuf, scount, sdtype,
+ rbuf, rcount, rdtype,
root, comm);
}
- // Otherwise, use basic linear
- return Coll_gather_ompi_basic_linear::gather (sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
+ // Otherwise, use basic linear
+ return Coll_gather_ompi_basic_linear::gather (sbuf, scount, sdtype,
+ rbuf, rcount, rdtype,
root, comm);
}
-int Coll_scatter_ompi::scatter(void *sbuf, int scount,
+int Coll_scatter_ompi::scatter(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
int root, MPI_Comm comm
)
{
communicator_size = comm->size();
rank = comm->rank();
- // Determine block size
+ // Determine block size
if (root == rank) {
dsize=sdtype->size();
block_size = dsize * scount;
} else {
dsize=rdtype->size();
block_size = dsize * rcount;
- }
+ }
if ((communicator_size > small_comm_size) &&
(block_size < small_block_size)) {
}
return ret;
}
- return Coll_scatter_ompi_basic_linear::scatter (sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
+ return Coll_scatter_ompi_basic_linear::scatter (sbuf, scount, sdtype,
+ rbuf, rcount, rdtype,
root, comm);
}
while (current != nullptr) {
if (strcmp (state, current) == 0 //exact match
|| strstr(target, current) != 0 ){//as substring
- ret = smpi_colors[i+1];
- break;
+ ret = smpi_colors[i+1];
+ break;
}
i+=2;
current = smpi_colors[i];
XBT_PRIVATE MPI_Comm* smpi_deployment_comm_world(const char* instance_id);
XBT_PRIVATE msg_bar_t smpi_deployment_finalization_barrier(const char* instance_id);
XBT_PRIVATE void smpi_deployment_cleanup_instances();
-
+
XBT_PRIVATE void smpi_comm_copy_buffer_callback(smx_activity_t comm, void *buff, size_t buff_size);
XBT_PRIVATE void smpi_comm_null_copy_buffer_callback(smx_activity_t comm, void *buff, size_t buff_size);
#define TOPAGE(addr) (void *)(((unsigned long)(addr) / xbt_pagesize) * xbt_pagesize)
#if HAVE_PAPI
-typedef
+typedef
std::vector<std::pair</* counter name */std::string, /* counter value */long long>> papi_counter_t;
XBT_PRIVATE papi_counter_t& smpi_process_papi_counters();
XBT_PRIVATE int smpi_process_papi_event_set();
smpi_trace_set_call_location(file, *line);
}
- /**
+ /**
* Required for Fortran if -fsecond-underscore is activated
*/
void smpi_trace_set_call_location__(const char* file, int* line) {
Datatype::copy(tmpbufs[other], count, datatype, recvbuf, count, datatype);
recvbuf_is_empty = 0;
} else
- if(op!=MPI_OP_NULL)
+ if(op!=MPI_OP_NULL)
op->apply( tmpbufs[other], recvbuf, &count, datatype);
}
}
}
MPI_Comm Comm::get_intra_comm(){
- if (this == MPI_COMM_UNINITIALIZED || this==MPI_COMM_WORLD)
+ if (this == MPI_COMM_UNINITIALIZED || this==MPI_COMM_WORLD)
return smpi_process()->comm_intra();
else return intra_comm_;
}
}
if(i != 0 && group_out != MPI_COMM_WORLD->group() && group_out != MPI_GROUP_EMPTY)
Group::unref(group_out);
-
+
Request::waitall(reqs, requests, MPI_STATUS_IGNORE);
xbt_free(requests);
}
smpi_process()->comm_world()->init_smp();
int comm_size = this->size();
-
- // If we are in replay - perform an ugly hack
+
+ // If we are in replay - perform an ugly hack
// tell SimGrid we are not in replay for a while, because we need the buffers to be copied for the following calls
bool replaying = false; //cache data to set it back again after
if(smpi_process()->replaying()){
is_blocked_=global_blocked;
}
xbt_free(leader_list);
-
+
if(replaying)
- smpi_process()->set_replaying(true);
+ smpi_process()->set_replaying(true);
}
MPI_Comm Comm::f2c(int id) {
private:
MPI_Group group_;
- MPIR_Topo_type topoType_;
+ MPIR_Topo_type topoType_;
MPI_Topology topo_; // to be replaced by an union
int refcount_;
MPI_Comm leaders_comm_;//inter-node communicator
void* value_out;
for(auto it = datatype->attributes()->begin(); it != datatype->attributes()->end(); it++){
smpi_key_elem elem = keyvals_.at((*it).first);
-
+
if (elem != nullptr && elem->copy_fn.type_copy_fn != MPI_NULL_COPY_FN) {
*ret = elem->copy_fn.type_copy_fn(datatype, (*it).first, nullptr, (*it).second, &value_out, &flag);
if (*ret != MPI_SUCCESS) {
int Datatype::create_vector(int count, int block_length, int stride, MPI_Datatype old_type, MPI_Datatype* new_type)
{
int retval;
- if (block_length<0)
+ if (block_length<0)
return MPI_ERR_ARG;
MPI_Aint lb = 0;
MPI_Aint ub = 0;
int Datatype::create_hvector(int count, int block_length, MPI_Aint stride, MPI_Datatype old_type, MPI_Datatype* new_type)
{
int retval;
- if (block_length<0)
+ if (block_length<0)
return MPI_ERR_ARG;
MPI_Aint lb = 0;
MPI_Aint ub = 0;
return MPI_ERR_ARG;
size += block_lengths[i];
- if(indices[i]+old_type->lb()<lb)
+ if(indices[i]+old_type->lb()<lb)
lb = indices[i]+old_type->lb();
- if(indices[i]+block_lengths[i]*old_type->ub()>ub)
+ if(indices[i]+block_lengths[i]*old_type->ub()>ub)
ub = indices[i]+block_lengths[i]*old_type->ub();
if ( (i< count -1) && (indices[i]+block_lengths[i]*(static_cast<int>(old_type->size())) != indices[i+1]) )
contiguous=false;
}
if (not contiguous) {
- *new_type = new Type_Struct(size, lb,ub, DT_FLAG_DERIVED|DT_FLAG_DATA,
+ *new_type = new Type_Struct(size, lb,ub, DT_FLAG_DERIVED|DT_FLAG_DATA,
count, block_lengths, indices, old_types);
}else{
Datatype::create_contiguous(size, MPI_CHAR, lb, new_type);
void set_name(char* name);
static int copy(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype);
- virtual void serialize( void* noncontiguous, void *contiguous,
+ virtual void serialize( void* noncontiguous, void *contiguous,
int count);
- virtual void unserialize( void* contiguous, void *noncontiguous,
+ virtual void unserialize( void* contiguous, void *noncontiguous,
int count, MPI_Op op);
static int keyval_create(MPI_Type_copy_attr_function* copy_fn, MPI_Type_delete_attr_function* delete_fn, int* keyval, void* extra_state);
static int keyval_free(int* keyval);
}
-void Type_Contiguous::serialize( void* noncontiguous_buf, void *contiguous_buf,
+void Type_Contiguous::serialize( void* noncontiguous_buf, void *contiguous_buf,
int count){
char* contiguous_buf_char = static_cast<char*>(contiguous_buf);
char* noncontiguous_buf_char = static_cast<char*>(noncontiguous_buf)+lb();
memcpy(contiguous_buf_char, noncontiguous_buf_char, count * block_count_ * old_type_->size());
}
-void Type_Contiguous::unserialize( void* contiguous_buf, void *noncontiguous_buf,
+void Type_Contiguous::unserialize( void* contiguous_buf, void *noncontiguous_buf,
int count, MPI_Op op){
char* contiguous_buf_char = static_cast<char*>(contiguous_buf);
char* noncontiguous_buf_char = static_cast<char*>(noncontiguous_buf)+lb();
}
-void Type_Vector::serialize( void* noncontiguous_buf, void *contiguous_buf,
+void Type_Vector::serialize( void* noncontiguous_buf, void *contiguous_buf,
int count){
char* contiguous_buf_char = static_cast<char*>(contiguous_buf);
char* noncontiguous_buf_char = static_cast<char*>(noncontiguous_buf);
}
}
-void Type_Vector::unserialize( void* contiguous_buf, void *noncontiguous_buf,
+void Type_Vector::unserialize( void* contiguous_buf, void *noncontiguous_buf,
int count, MPI_Op op){
char* contiguous_buf_char = static_cast<char*>(contiguous_buf);
char* noncontiguous_buf_char = static_cast<char*>(noncontiguous_buf);
Datatype::unref(old_type_);
}
-void Type_Hvector::serialize( void* noncontiguous_buf, void *contiguous_buf,
+void Type_Hvector::serialize( void* noncontiguous_buf, void *contiguous_buf,
int count){
char* contiguous_buf_char = static_cast<char*>(contiguous_buf);
char* noncontiguous_buf_char = static_cast<char*>(noncontiguous_buf);
}
-void Type_Hvector::unserialize( void* contiguous_buf, void *noncontiguous_buf,
+void Type_Hvector::unserialize( void* contiguous_buf, void *noncontiguous_buf,
int count, MPI_Op op){
char* contiguous_buf_char = static_cast<char*>(contiguous_buf);
char* noncontiguous_buf_char = static_cast<char*>(noncontiguous_buf);
for (int i = 0; i < block_count_ * count; i++) {
if (not(old_type_->flags() & DT_FLAG_DERIVED)) {
- if(op!=MPI_OP_NULL)
+ if(op!=MPI_OP_NULL)
op->apply( contiguous_buf_char, noncontiguous_buf_char, &block_length_, old_type_);
}else
old_type_->unserialize( contiguous_buf_char, noncontiguous_buf_char, block_length_, op);
}
-void Type_Indexed::serialize( void* noncontiguous_buf, void *contiguous_buf,
+void Type_Indexed::serialize( void* noncontiguous_buf, void *contiguous_buf,
int count){
char* contiguous_buf_char = static_cast<char*>(contiguous_buf);
char* noncontiguous_buf_char = static_cast<char*>(noncontiguous_buf)+block_indices_[0] * old_type_->size();
}
-void Type_Indexed::unserialize( void* contiguous_buf, void *noncontiguous_buf,
+void Type_Indexed::unserialize( void* contiguous_buf, void *noncontiguous_buf,
int count, MPI_Op op){
char* contiguous_buf_char = static_cast<char*>(contiguous_buf);
char* noncontiguous_buf_char =
for (int j = 0; j < count; j++) {
for (int i = 0; i < block_count_; i++) {
if (not(old_type_->flags() & DT_FLAG_DERIVED)) {
- if(op!=MPI_OP_NULL)
+ if(op!=MPI_OP_NULL)
op->apply( contiguous_buf_char, noncontiguous_buf_char, &block_lengths_[i],
old_type_);
}else
}
}
-void Type_Hindexed::serialize( void* noncontiguous_buf, void *contiguous_buf,
+void Type_Hindexed::serialize( void* noncontiguous_buf, void *contiguous_buf,
int count){
char* contiguous_buf_char = static_cast<char*>(contiguous_buf);
char* noncontiguous_buf_char = static_cast<char*>(noncontiguous_buf)+ block_indices_[0];
}
}
-void Type_Hindexed::unserialize( void* contiguous_buf, void *noncontiguous_buf,
+void Type_Hindexed::unserialize( void* contiguous_buf, void *noncontiguous_buf,
int count, MPI_Op op){
char* contiguous_buf_char = static_cast<char*>(contiguous_buf);
char* noncontiguous_buf_char = static_cast<char*>(noncontiguous_buf)+ block_indices_[0];
for (int j = 0; j < count; j++) {
for (int i = 0; i < block_count_; i++) {
if (not(old_type_->flags() & DT_FLAG_DERIVED)) {
- if(op!=MPI_OP_NULL)
+ if(op!=MPI_OP_NULL)
op->apply( contiguous_buf_char, noncontiguous_buf_char, &block_lengths_[i],
old_type_);
}else
}
-void Type_Struct::serialize( void* noncontiguous_buf, void *contiguous_buf,
+void Type_Struct::serialize( void* noncontiguous_buf, void *contiguous_buf,
int count){
char* contiguous_buf_char = static_cast<char*>(contiguous_buf);
char* noncontiguous_buf_char = static_cast<char*>(noncontiguous_buf)+ block_indices_[0];
}
}
-void Type_Struct::unserialize( void* contiguous_buf, void *noncontiguous_buf,
+void Type_Struct::unserialize( void* contiguous_buf, void *noncontiguous_buf,
int count, MPI_Op op){
char* contiguous_buf_char = static_cast<char*>(contiguous_buf);
char* noncontiguous_buf_char = static_cast<char*>(noncontiguous_buf)+ block_indices_[0];
for (int j = 0; j < count; j++) {
for (int i = 0; i < block_count_; i++) {
if (not(old_types_[i]->flags() & DT_FLAG_DERIVED)) {
- if(op!=MPI_OP_NULL)
+ if(op!=MPI_OP_NULL)
op->apply( contiguous_buf_char, noncontiguous_buf_char, &block_lengths_[i], old_types_[i]);
}else
old_types_[i]->unserialize( contiguous_buf_char, noncontiguous_buf_char,block_lengths_[i], op);
public:
Type_Contiguous(int size, MPI_Aint lb, MPI_Aint ub, int flags, int block_count, MPI_Datatype old_type);
~Type_Contiguous();
- void serialize( void* noncontiguous, void *contiguous,
+ void serialize( void* noncontiguous, void *contiguous,
int count);
- void unserialize( void* contiguous_vector, void *noncontiguous_vector,
+ void unserialize( void* contiguous_vector, void *noncontiguous_vector,
int count, MPI_Op op);
};
public:
Type_Vector(int size,MPI_Aint lb, MPI_Aint ub, int flags, int count, int blocklen, int stride, MPI_Datatype old_type);
~Type_Vector();
- void serialize( void* noncontiguous, void *contiguous,
+ void serialize( void* noncontiguous, void *contiguous,
int count);
- void unserialize( void* contiguous_vector, void *noncontiguous_vector,
+ void unserialize( void* contiguous_vector, void *noncontiguous_vector,
int count, MPI_Op op);
};
public:
Type_Hvector(int size,MPI_Aint lb, MPI_Aint ub, int flags, int block_count, int block_length, MPI_Aint block_stride, MPI_Datatype old_type);
~Type_Hvector();
- void serialize( void* noncontiguous, void *contiguous,
+ void serialize( void* noncontiguous, void *contiguous,
int count);
- void unserialize( void* contiguous_vector, void *noncontiguous_vector,
+ void unserialize( void* contiguous_vector, void *noncontiguous_vector,
int count, MPI_Op op);
};
public:
Type_Indexed(int size,MPI_Aint lb, MPI_Aint ub, int flags, int block_count, int* block_lengths, int* block_indices, MPI_Datatype old_type);
~Type_Indexed();
- void serialize( void* noncontiguous, void *contiguous,
+ void serialize( void* noncontiguous, void *contiguous,
int count);
- void unserialize( void* contiguous_vector, void *noncontiguous_vector,
+ void unserialize( void* contiguous_vector, void *noncontiguous_vector,
int count, MPI_Op op);
};
public:
Type_Hindexed(int size,MPI_Aint lb, MPI_Aint ub, int flags, int block_count, int* block_lengths, MPI_Aint* block_indices, MPI_Datatype old_type);
~Type_Hindexed();
- void serialize( void* noncontiguous, void *contiguous,
+ void serialize( void* noncontiguous, void *contiguous,
int count);
- void unserialize( void* contiguous_vector, void *noncontiguous_vector,
+ void unserialize( void* contiguous_vector, void *noncontiguous_vector,
int count, MPI_Op op);
};
public:
Type_Struct(int size,MPI_Aint lb, MPI_Aint ub, int flags, int block_count, int* block_lengths, MPI_Aint* block_indices, MPI_Datatype* old_types);
~Type_Struct();
- void serialize( void* noncontiguous, void *contiguous,
+ void serialize( void* noncontiguous, void *contiguous,
int count);
- void unserialize( void* contiguous_vector, void *noncontiguous_vector,
+ void unserialize( void* contiguous_vector, void *noncontiguous_vector,
int count, MPI_Op op);
};
class F2C {
private:
- // We use a single lookup table for every type.
+ // We use a single lookup table for every type.
// Beware of collisions if id in mpif.h is not unique
static xbt_dict_t f2c_lookup_;
static int f2c_id_;
void mpi_wait_(int* request, MPI_Status* status, int* ierr) {
MPI_Request req = simgrid::smpi::Request::f2c(*request);
-
+
*ierr = MPI_Wait(&req, FORT_STATUS_IGNORE(status));
if(req==MPI_REQUEST_NULL){
simgrid::smpi::Request::free_f(*request);
void mpi_win_get_name_ (int* win, char * name, int* len, int* ierr){
*ierr = MPI_Win_get_name(simgrid::smpi::Win::f2c(*win),name,len);
- if(*len>0)
+ if(*len>0)
name[*len]=' ';//blank padding, not \0
}
}
char* tkey = xbt_new(char,keylen+1);
strncpy(tkey, key, keylen);
- tkey[keylen]='\0';
+ tkey[keylen]='\0';
while(value[valuelen-1]==' ')
valuelen--;
}
char* tvalue = xbt_new(char,valuelen+1);
strncpy(tvalue, value, valuelen);
- tvalue[valuelen]='\0';
+ tvalue[valuelen]='\0';
*ierr = MPI_Info_set( simgrid::smpi::Info::f2c(*info), tkey, tvalue);
xbt_free(tkey);
while(*key==' '){//handle leading blanks
keylen--;
key++;
- }
+ }
char* tkey = xbt_new(char,keylen+1);
strncpy(tkey, key, keylen);
tkey[keylen]='\0';
if(replace)
value[i]=' ';
}
- }
+ }
}
void mpi_info_free_(int* info, int* ierr){
void mpi_type_get_name_ (int* datatype, char * name, int* len, int* ierr){
*ierr = MPI_Type_get_name(simgrid::smpi::Datatype::f2c(*datatype),name,len);
- if(*len>0)
+ if(*len>0)
name[*len]=' ';
}
void mpi_comm_get_name_ (int* comm, char* name, int* len, int* ierr){
*ierr = MPI_Comm_get_name(simgrid::smpi::Comm::f2c(*comm), name, len);
- if(*len>0)
+ if(*len>0)
name[*len]=' ';
}
xbt_os_walltimer_start(global_timer);
}
- if (xbt_cfg_get_string("smpi/comp-adjustment-file")[0] != '\0') {
+ if (xbt_cfg_get_string("smpi/comp-adjustment-file")[0] != '\0') {
std::string filename {xbt_cfg_get_string("smpi/comp-adjustment-file")};
std::ifstream fstream(filename);
if (not fstream.is_open()) {
if (MC_is_active()) {
MC_run();
} else {
-
+
SIMIX_run();
xbt_os_walltimer_stop(global_timer);
"The simulation took %g seconds (after parsing and platform setup)\n"
"%g seconds were actual computation of the application",
SIMIX_get_clock(), global_time , smpi_total_benched_time);
-
+
if (smpi_total_benched_time/global_time>=0.75)
XBT_INFO("More than 75%% of the time was spent inside the application code.\n"
"You may want to use sampling functions or trace replay to reduce this.");
}
void smpi_mpi_init() {
- if(smpi_init_sleep > 0)
+ if(smpi_init_sleep > 0)
simcall_process_sleep(smpi_init_sleep);
}
// }
// because the time will not normally advance when only calls to MPI_Wtime
// are made -> deadlock (MPI_Wtime never reaches the time limit)
- if(smpi_wtime_sleep > 0)
+ if(smpi_wtime_sleep > 0)
simcall_process_sleep(smpi_wtime_sleep);
smpi_bench_begin();
} else {
template <> int Keyval::call_deleter<Comm>(Comm* obj, smpi_key_elem elem, int keyval, void * value, int* flag){
if(elem->delete_fn.comm_delete_fn!=MPI_NULL_DELETE_FN){
int ret = elem->delete_fn.comm_delete_fn(obj, keyval, value, flag);
- if(ret!=MPI_SUCCESS)
+ if(ret!=MPI_SUCCESS)
return ret;
}
return MPI_SUCCESS;
template <> int Keyval::call_deleter<Win>(Win* obj, smpi_key_elem elem, int keyval, void * value, int* flag){
if(elem->delete_fn.win_delete_fn!=MPI_NULL_DELETE_FN){
int ret = elem->delete_fn.win_delete_fn(obj, keyval, value, flag);
- if(ret!=MPI_SUCCESS)
+ if(ret!=MPI_SUCCESS)
return ret;
}
return MPI_SUCCESS;
template <> int Keyval::call_deleter<Datatype>(Datatype* obj, smpi_key_elem elem, int keyval, void * value, int* flag){
if(elem->delete_fn.type_delete_fn!=MPI_NULL_DELETE_FN){
int ret = elem->delete_fn.type_delete_fn(obj, keyval, value, flag);
- if(ret!=MPI_SUCCESS)
+ if(ret!=MPI_SUCCESS)
return ret;
}
return MPI_SUCCESS;
smpi_copy_fn copy_fn;
smpi_delete_fn delete_fn;
int refcount;
-} s_smpi_mpi_key_elem_t;
+} s_smpi_mpi_key_elem_t;
typedef struct s_smpi_key_elem *smpi_key_elem;
protected:
std::unordered_map<int, void*>* attributes();
public:
-// Each subclass should have two members, as we want to separate the ones for Win, Comm, and Datatypes :
+// Each subclass should have two members, as we want to separate the ones for Win, Comm, and Datatypes :
// static std::unordered_map<int, smpi_key_elem> keyvals_;
// static int keyval_id_;
template <typename T> static int keyval_create(smpi_copy_fn copy_fn, smpi_delete_fn delete_fn, int* keyval, void* extra_statee);
/*
Unimplemented Calls - both PMPI and MPI calls are generated.
When implementing, please move ahead, swap UNIMPLEMENTED_WRAPPED_PMPI_CALL for WRAPPED_PMPI_CALL,
- and implement PMPI version of the function in smpi_pmpi.cpp file
+ and implement PMPI version of the function in smpi_pmpi.cpp file
*/
extra->send_size = count*dt_size_send;
TRACE_smpi_ptp_in(rank, rank, dst_traced, __FUNCTION__, extra);
TRACE_smpi_send(rank, rank, dst_traced, tag,count*datatype->size());
-
+
simgrid::smpi::Request::ssend(buf, count, datatype, dst, tag, comm);
retval = MPI_SUCCESS;
-
+
TRACE_smpi_ptp_out(rank, rank, dst_traced, __FUNCTION__);
}
}else if (dst >= comm->group()->size() || dst <0 ||
(src!=MPI_ANY_SOURCE && (src >= comm->group()->size() || src <0))){
retval = MPI_ERR_RANK;
- } else if ((sendcount < 0 || recvcount<0) ||
+ } else if ((sendcount < 0 || recvcount<0) ||
(sendbuf==nullptr && sendcount > 0) || (recvbuf==nullptr && recvcount>0)) {
retval = MPI_ERR_COUNT;
} else if((sendtag<0 && sendtag != MPI_ANY_TAG)||(recvtag<0 && recvtag != MPI_ANY_TAG)){
retval = MPI_SUCCESS;
} else if (target_rank <0){
retval = MPI_ERR_RANK;
- } else if (win->dynamic()==0 && target_disp <0){
+ } else if (win->dynamic()==0 && target_disp <0){
//in case of dynamic window, target_disp can be mistakenly seen as negative, as it is an address
retval = MPI_ERR_ARG;
} else if ((origin_count < 0 || target_count < 0) ||
retval = MPI_SUCCESS;
} else if (target_rank <0){
retval = MPI_ERR_RANK;
- } else if (win->dynamic()==0 && target_disp <0){
+ } else if (win->dynamic()==0 && target_disp <0){
//in case of dynamic window, target_disp can be mistakenly seen as negative, as it is an address
retval = MPI_ERR_ARG;
} else if ((origin_count < 0 || target_count < 0) ||
retval = MPI_SUCCESS;
} else if (target_rank <0){
retval = MPI_ERR_RANK;
- } else if (win->dynamic()==0 && target_disp <0){
+ } else if (win->dynamic()==0 && target_disp <0){
//in case of dynamic window, target_disp can be mistakenly seen as negative, as it is an address
retval = MPI_ERR_ARG;
} else if ((origin_count < 0 || target_count < 0) ||
retval = MPI_SUCCESS;
} else if (target_rank <0){
retval = MPI_ERR_RANK;
- } else if (win->dynamic()==0 && target_disp <0){
+ } else if (win->dynamic()==0 && target_disp <0){
//in case of dynamic window, target_disp can be mistakenly seen as negative, as it is an address
retval = MPI_ERR_ARG;
} else if ((origin_count < 0 || target_count < 0) ||
retval = MPI_SUCCESS;
} else if (target_rank <0){
retval = MPI_ERR_RANK;
- } else if (win->dynamic()==0 && target_disp <0){
+ } else if (win->dynamic()==0 && target_disp <0){
//in case of dynamic window, target_disp can be mistakenly seen as negative, as it is an address
retval = MPI_ERR_ARG;
} else if ((origin_count < 0 || target_count < 0) ||
retval = MPI_SUCCESS;
} else if (target_rank <0){
retval = MPI_ERR_RANK;
- } else if (win->dynamic()==0 && target_disp <0){
+ } else if (win->dynamic()==0 && target_disp <0){
//in case of dynamic window, target_disp can be mistakenly seen as negative, as it is an address
retval = MPI_ERR_ARG;
} else if ((origin_count < 0 || target_count < 0) ||
return retval;
}
-int PMPI_Get_accumulate(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr,
-int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count,
+int PMPI_Get_accumulate(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr,
+int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count,
MPI_Datatype target_datatype, MPI_Op op, MPI_Win win){
int retval = 0;
smpi_bench_end();
retval = MPI_SUCCESS;
} else if (target_rank <0){
retval = MPI_ERR_RANK;
- } else if (win->dynamic()==0 && target_disp <0){
+ } else if (win->dynamic()==0 && target_disp <0){
//in case of dynamic window, target_disp can be mistakenly seen as negative, as it is an address
retval = MPI_ERR_ARG;
} else if ((origin_count < 0 || target_count < 0 || result_count <0) ||
int src_traced = group->index(target_rank);
TRACE_smpi_ptp_in(rank, src_traced, rank, __FUNCTION__, nullptr);
- retval = win->get_accumulate( origin_addr, origin_count, origin_datatype, result_addr,
- result_count, result_datatype, target_rank, target_disp,
+ retval = win->get_accumulate( origin_addr, origin_count, origin_datatype, result_addr,
+ result_count, result_datatype, target_rank, target_disp,
target_count, target_datatype, op);
TRACE_smpi_ptp_out(rank, src_traced, rank, __FUNCTION__);
}
-int PMPI_Rget_accumulate(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr,
-int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count,
+int PMPI_Rget_accumulate(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr,
+int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count,
MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request* request){
int retval = 0;
smpi_bench_end();
retval = MPI_SUCCESS;
} else if (target_rank <0){
retval = MPI_ERR_RANK;
- } else if (win->dynamic()==0 && target_disp <0){
+ } else if (win->dynamic()==0 && target_disp <0){
//in case of dynamic window, target_disp can be mistakenly seen as negative, as it is an address
retval = MPI_ERR_ARG;
} else if ((origin_count < 0 || target_count < 0 || result_count <0) ||
int src_traced = group->index(target_rank);
TRACE_smpi_ptp_in(rank, src_traced, rank, __FUNCTION__, nullptr);
- retval = win->get_accumulate( origin_addr, origin_count, origin_datatype, result_addr,
- result_count, result_datatype, target_rank, target_disp,
+ retval = win->get_accumulate( origin_addr, origin_count, origin_datatype, result_addr,
+ result_count, result_datatype, target_rank, target_disp,
target_count, target_datatype, op, request);
TRACE_smpi_ptp_out(rank, src_traced, rank, __FUNCTION__);
retval = MPI_SUCCESS;
} else if (target_rank <0){
retval = MPI_ERR_RANK;
- } else if (win->dynamic()==0 && target_disp <0){
+ } else if (win->dynamic()==0 && target_disp <0){
//in case of dynamic window, target_disp can be mistakenly seen as negative, as it is an address
retval = MPI_ERR_ARG;
} else if (origin_addr==nullptr || result_addr==nullptr || compare_addr==nullptr){
int src_traced = group->index(target_rank);
TRACE_smpi_ptp_in(rank, src_traced, rank, __FUNCTION__, nullptr);
- retval = win->compare_and_swap( origin_addr, compare_addr, result_addr, datatype,
+ retval = win->compare_and_swap( origin_addr, compare_addr, result_addr, datatype,
target_rank, target_disp);
TRACE_smpi_ptp_out(rank, src_traced, rank, __FUNCTION__);
smpi_bench_end();
if (win == MPI_WIN_NULL) {
retval = MPI_ERR_WIN;
- } else if (lock_type != MPI_LOCK_EXCLUSIVE &&
+ } else if (lock_type != MPI_LOCK_EXCLUSIVE &&
lock_type != MPI_LOCK_SHARED) {
retval = MPI_ERR_LOCKTYPE;
- } else if (rank == MPI_PROC_NULL){
+ } else if (rank == MPI_PROC_NULL){
retval = MPI_SUCCESS;
} else {
int myrank = smpi_process()->index();
smpi_bench_end();
if (win == MPI_WIN_NULL) {
retval = MPI_ERR_WIN;
- } else if (rank == MPI_PROC_NULL){
+ } else if (rank == MPI_PROC_NULL){
retval = MPI_SUCCESS;
} else {
int myrank = smpi_process()->index();
smpi_bench_end();
if (win == MPI_WIN_NULL) {
retval = MPI_ERR_WIN;
- } else if (rank == MPI_PROC_NULL){
+ } else if (rank == MPI_PROC_NULL){
retval = MPI_SUCCESS;
} else {
int myrank = smpi_process()->index();
smpi_bench_end();
if (win == MPI_WIN_NULL) {
retval = MPI_ERR_WIN;
- } else if (rank == MPI_PROC_NULL){
+ } else if (rank == MPI_PROC_NULL){
retval = MPI_SUCCESS;
} else {
int myrank = smpi_process()->index();
{
XBT_DEBUG("Initialize the counters");
CHECK_ACTION_PARAMS(action, 0, 1)
- if(action[2])
- MPI_DEFAULT_TYPE=MPI_DOUBLE; // default MPE dataype
+ if(action[2])
+ MPI_DEFAULT_TYPE=MPI_DOUBLE; // default MPE dataype
else MPI_DEFAULT_TYPE= MPI_BYTE; // default TAU datatype
/* start a simulated timer */
double size=parse_double(action[3]);
double clock = smpi_process()->simulated_elapsed();
- if(action[4])
+ if(action[4])
MPI_CURRENT_TYPE=decode_datatype(action[4]);
- else
+ else
MPI_CURRENT_TYPE= MPI_DEFAULT_TYPE;
int rank = smpi_process()->index();
double clock = smpi_process()->simulated_elapsed();
MPI_Status status;
- if(action[4])
+ if(action[4])
MPI_CURRENT_TYPE=decode_datatype(action[4]);
- else
+ else
MPI_CURRENT_TYPE= MPI_DEFAULT_TYPE;
int rank = smpi_process()->index();
double size=parse_double(action[3]);
double clock = smpi_process()->simulated_elapsed();
- if(action[4])
+ if(action[4])
MPI_CURRENT_TYPE=decode_datatype(action[4]);
- else
+ else
MPI_CURRENT_TYPE= MPI_DEFAULT_TYPE;
int rank = smpi_process()->index();
MPI_Request request = get_reqq_self()->back();
get_reqq_self()->pop_back();
- //if request is null here, this may mean that a previous test has succeeded
- //Different times in traced application and replayed version may lead to this
+ //if request is null here, this may mean that a previous test has succeeded
+ //Different times in traced application and replayed version may lead to this
//In this case, ignore the extra calls.
if(request!=nullptr){
int rank = smpi_process()->index();
double clock = smpi_process()->simulated_elapsed();
int root=0;
/* Initialize MPI_CURRENT_TYPE in order to decrease the number of the checks */
- MPI_CURRENT_TYPE= MPI_DEFAULT_TYPE;
+ MPI_CURRENT_TYPE= MPI_DEFAULT_TYPE;
if(action[3]) {
root= atoi(action[3]);
if(action[4])
- MPI_CURRENT_TYPE=decode_datatype(action[4]);
+ MPI_CURRENT_TYPE=decode_datatype(action[4]);
}
int rank = smpi_process()->index();
double clock = smpi_process()->simulated_elapsed();
CHECK_ACTION_PARAMS(action, 2, 2)
- int sendcount=atoi(action[2]);
- int recvcount=atoi(action[3]);
+ int sendcount=atoi(action[2]);
+ int recvcount=atoi(action[3]);
MPI_Datatype MPI_CURRENT_TYPE2 = MPI_DEFAULT_TYPE;
namespace simgrid{
namespace smpi{
-Request::Request(void *buf, int count, MPI_Datatype datatype, int src, int dst, int tag, MPI_Comm comm, unsigned flags) : buf_(buf), old_type_(datatype), src_(src), dst_(dst), tag_(tag), comm_(comm), flags_(flags)
+Request::Request(void *buf, int count, MPI_Datatype datatype, int src, int dst, int tag, MPI_Comm comm, unsigned flags) : buf_(buf), old_type_(datatype), src_(src), dst_(dst), tag_(tag), comm_(comm), flags_(flags)
{
void *old_buf = nullptr;
// FIXME Handle the case of a partial shared malloc.
ref->real_src_ = req->src_;
if(ref->tag_ == MPI_ANY_TAG)
ref->real_tag_ = req->tag_;
- if(ref->real_size_ < req->real_size_)
+ if(ref->real_size_ < req->real_size_)
ref->truncated_ = 1;
if(req->detached_==1)
ref->detached_sender_=req; //tie the sender to the receiver, as it is detached and has to be freed in the receiver
if (async_small_thresh == 0 && (flags_ & RMA) == 0 ) {
mailbox = process->mailbox();
- }
+ }
else if (((flags_ & RMA) != 0) || static_cast<int>(size_) < async_small_thresh) {
//We have to check both mailboxes (because SSEND messages are sent to the large mbox).
//begin with the more appropriate one : the small one.
void Request::startall(int count, MPI_Request * requests)
{
- if(requests== nullptr)
+ if(requests== nullptr)
return;
for(int i = 0; i < count; i++) {
// because the time will not normally advance when only calls to MPI_Test are made -> deadlock
// multiplier to the sleeptime, to increase speed of execution, each failed test will increase it
static int nsleeps = 1;
- if(smpi_test_sleep > 0)
+ if(smpi_test_sleep > 0)
simcall_process_sleep(nsleeps*smpi_test_sleep);
Status::empty(status);
if (not map.empty()) {
//multiplier to the sleeptime, to increase speed of execution, each failed testany will increase it
static int nsleeps = 1;
- if(smpi_test_sleep > 0)
+ if(smpi_test_sleep > 0)
simcall_process_sleep(nsleeps*smpi_test_sleep);
i = simcall_comm_testany(comms.data(), comms.size()); // The i-th element in comms matches!
if (i != -1) { // -1 is not MPI_UNDEFINED but a SIMIX return code. (nothing matches)
- *index = map[i];
+ *index = map[i];
finish_wait(&requests[*index],status);
flag = 1;
nsleeps = 1;
namespace simgrid{
namespace smpi{
-
-Topo_Graph::~Topo_Graph()
+Topo_Graph::~Topo_Graph()
{
delete[] index_;
delete[] edges_;
}
-Topo_Dist_Graph::~Topo_Dist_Graph()
+Topo_Dist_Graph::~Topo_Dist_Graph()
{
delete[] in_;
delete[] in_weights_;
/*******************************************************************************
* Cartesian topologies
******************************************************************************/
-Topo_Cart::~Topo_Cart()
+Topo_Cart::~Topo_Cart()
{
delete[] dims_;
delete[] periodic_;
Topo_Cart::Topo_Cart(MPI_Comm comm_old, int ndims, int dims[], int periods[], int reorder, MPI_Comm *comm_cart) : Topo_Cart(ndims) {
MPI_Group newGroup;
MPI_Group oldGroup;
- int nranks;
int rank = comm_old->rank();
- int newSize = 1;
if(ndims != 0) {
+ int newSize = 1;
for (int i = 0 ; i < ndims ; i++) {
newSize *= dims[i];
}
nnodes_ = newSize;
// FIXME : code duplication... See coords
- nranks = newSize;
+ int nranks = newSize;
for (int i=0; i<ndims; i++) {
dims_[i] = dims[i];
periodic_[i] = periods[i];
Topo_Cart* Topo_Cart::sub(const int remain_dims[], MPI_Comm *newcomm) {
int oldNDims = ndims_;
- int j = 0;
int *newDims = nullptr;
int *newPeriodic = nullptr;
newPeriodic = xbt_new(int, newNDims);
// that should not segfault
+ int j = 0;
for (int i = 0 ; j < newNDims ; i++) {
if(remain_dims[i]) {
newDims[j] =dims_[i];
int Topo_Cart::rank(int* coords, int* rank) {
int ndims =ndims_;
- int coord;
*rank = 0;
int multiplier = 1;
for (int i=ndims-1; i >=0; i-- ) {
- coord = coords[i];
+ int coord = coords[i];
/* The user can give us whatever coordinates he wants. If one of them is out of range, either this dimension is
* periodic, and we consider the equivalent coordinate inside the bounds, or it's not and then it's an error
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
- * Copyright (c) 2004-2014 High Performance Computing Center Stuttgart,
+ * Copyright (c) 2004-2014 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
- * reserved.
+ * reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* $HEADER$
*/
-
/*
* This is a utility function, no need to have anything in the lower layer for this at all
*/
comm_->group()->index(target_rank), smpi_process()->index(), SMPI_RMA_TAG+2, comm_,
MPI_OP_NULL);
- //start the send, with another process than us as sender.
+ //start the send, with another process than us as sender.
sreq->start();
//push request to receiver's win
xbt_mutex_acquire(send_win->mut_);
if(locked != 1)
return MPI_ERR_WIN;
}
- //FIXME: local version
+ //FIXME: local version
if(target_count*target_datatype->get_extent()>recv_win->size_)
return MPI_ERR_ARG;
return MPI_SUCCESS;
}
-int Win::get_accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr,
- int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count,
+int Win::get_accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr,
+ int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count,
MPI_Datatype target_datatype, MPI_Op op, MPI_Request* request){
//get sender pointer
MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Request* request=nullptr);
int accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank,
MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Request* request=nullptr);
- int get_accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr,
- int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count,
+ int get_accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr,
+ int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count,
MPI_Datatype target_datatype, MPI_Op op, MPI_Request* request=nullptr);
int compare_and_swap(void *origin_addr, void *compare_addr,
void *result_addr, MPI_Datatype datatype, int target_rank,
XBT_DEBUG("Fair bottleneck Initialized");
- /*
+ /*
* Compute Usage and store the variables that reach the maximum.
*/
do {
void lagrange_solve(lmm_system_t sys);
//computes the value of the dichotomy using a initial values, init, with a specific variable or constraint
static double dichotomy(double init, double diff(double, void *), void *var_cnst, double min_error);
-//computes the value of the differential of constraint param_cnst applied to lambda
+//computes the value of the differential of constraint param_cnst applied to lambda
static double partial_diff_lambda(double lambda, void *param_cnst);
static int __check_feasible(xbt_swag_t cnst_list, xbt_swag_t var_list, int warn)
XBT_DEBUG("#### cnst(%p)->lambda : %e", cnst, cnst->lambda);
}
- /*
- * Initialize the var list variable with only the active variables.
+ /*
+ * Initialize the var list variable with only the active variables.
* Associate an index in the swag variables. Initialize mu.
*/
var_list = &(sys->variable_set);
*
* @param init initial value for \mu or \lambda
* @param diff a function that computes the differential of with respect a \mu or \lambda
- * @param var_cnst a pointer to a variable or constraint
+ * @param var_cnst a pointer to a variable or constraint
* @param min_erro a minimum error tolerated
*
* @return a double corresponding to the result of the dichotomy process
// Initialize the summation variable
sigma_i = 0.0;
- // Compute sigma_i
+ // Compute sigma_i
for (j = 0; j < var->cnsts_number; j++) {
sigma_i += (var->cnsts[j].constraint)->lambda;
}
}
/** \brief Attribute the value bound to var->bound.
- *
+ *
* \param func_fpi inverse of the partial differential of f (f prime inverse, (f')^{-1})
- *
+ *
* Set default functions to the ones passed as parameters. This is a polymorphism in C pure, enjoy the roots of
* programming.
*
return (cnst->sharing_policy);
}
-/* @brief Remove a constraint
+/* @brief Remove a constraint
* Currently this is dead code, but it is exposed in maxmin.h
- * Apparently, this call was designed assuming that constraint would no more have elements in it.
+ * Apparently, this call was designed assuming that constraint would no more have elements in it.
* If not the case, assertion will fail, and you need to add calls e.g. to lmm_shrink before effectively removing it.
*/
inline void lmm_constraint_free(lmm_system_t sys,lmm_constraint_t cnst)
make_constraint_inactive(sys, cnst);
else {
//Check maxconcurrency to see if we can enable new variables
- lmm_on_disabled_var(sys,elem->constraint);
+ lmm_on_disabled_var(sys,elem->constraint);
}
lmm_check_concurrency(sys);
}
}
- //Check if we need to disable the variable
+ //Check if we need to disable the variable
if(var->weight>0 && var->concurrency_share-current_share>lmm_concurrency_slack(cnst)) {
double weight = var->weight;
lmm_disable_var(sys,var);
if (*elem == nullptr)
*elem = (lmm_element_t) xbt_swag_getFirst(&(cnst->disabled_element_set));
} else {
- *elem = (lmm_element_t) xbt_swag_getNext(*elem, cnst->disabled_element_set.offset);
+ *elem = (lmm_element_t) xbt_swag_getNext(*elem, cnst->disabled_element_set.offset);
}
}
if (*elem)
if (*nextelem == nullptr)
*nextelem = (lmm_element_t) xbt_swag_getFirst(&(cnst->disabled_element_set));
} else {
- *nextelem = (lmm_element_t) xbt_swag_getNext(*elem, cnst->disabled_element_set.offset);
+ *nextelem = (lmm_element_t) xbt_swag_getNext(*elem, cnst->disabled_element_set.offset);
}
return (*elem)->variable;
}else
static inline void saturated_variable_set_update(s_lmm_constraint_light_t *cnst_light_tab,
dyn_light_t saturated_constraint_set, lmm_system_t sys)
{
- /* Add active variables (i.e. variables that need to be set) from the set of constraints to saturate (cnst_light_tab)*/
+ /* Add active variables (i.e. variables that need to be set) from the set of constraints to saturate (cnst_light_tab)*/
lmm_constraint_light_t cnst = nullptr;
void *_elem;
lmm_element_t elem = nullptr;
std::to_string(elem->variable->value) + ")" + ((cnst->sharing_policy) ? " + " : " , ");
if(cnst->sharing_policy)
sum += elem->value * elem->variable->value;
- else
+ else
sum = MAX(sum,elem->value * elem->variable->value);
}
//TODO: Adding disabled elements only for test compatibility, but do we really want them to be printed?
std::to_string(elem->variable->value) + ")" + ((cnst->sharing_policy) ? " + " : " , ");
if(cnst->sharing_policy)
sum += elem->value * elem->variable->value;
- else
+ else
sum = MAX(sum,elem->value * elem->variable->value);
}
}
/** \brief Attribute the value bound to var->bound.
- *
+ *
* \param sys the lmm_system_t
* \param var the lmm_variable_t
* \param bound the new bound to associate with var
- *
+ *
* Makes var->bound equal to bound. Whenever this function is called a change is signed in the system. To
* avoid false system changing detection it is a good idea to test (bound != 0) before calling it.
*/
if(cnstr->concurrency_limit<0)
return 666;
- return cnstr->concurrency_limit - cnstr->concurrency_current;
+ return cnstr->concurrency_limit - cnstr->concurrency_current;
}
/** \brief Measure the minimum concurrency slack across all constraints where the given var is involved */
/* /Check if a variable can be enabled
*
- * Make sure to set staged_weight before, if your intent is only to check concurrency
+ * Make sure to set staged_weight before, if your intent is only to check concurrency
*/
int lmm_can_enable_var(lmm_variable_t var){
return var->staged_weight>0 && lmm_cnstrs_min_concurrency_slack(var)>=var->concurrency_share;
//When used within lmm_on_disabled_var, we would get an assertion fail, because transiently there can be variables
// that are staged and could be activated.
- //Anyway, caller functions all call lmm_check_concurrency() in the end.
+ //Anyway, caller functions all call lmm_check_concurrency() in the end.
// lmm_check_concurrency(sys);
}
var->value = 0.0;
lmm_check_concurrency(sys);
}
-
+
/* /brief Find variables that can be enabled and enable them.
- *
+ *
* Assuming that the variable has already been removed from non-zero weights
* Can we find a staged variable to add?
* If yes, check that none of the constraints that this variable is involved in is at the limit of its concurrency
//We could get an assertion fail, because transiently there can be variables that are staged and could be activated.
//And we need to go through all constraints of the disabled var before getting back a coherent state.
- //Anyway, caller functions all call lmm_check_concurrency() in the end.
+ //Anyway, caller functions all call lmm_check_concurrency() in the end.
// lmm_check_concurrency(sys);
}
lmm_update_modified_set_rec(sys, cnsts[i].constraint);
}
}
- //var will be ignored in later visits as long as sys->visited_counter does not move
+ //var will be ignored in later visits as long as sys->visited_counter does not move
var->visited = sys->visited_counter;
}
}
void *_var;
xbt_swag_foreach(_var, &sys->variable_set)
((lmm_variable_t)_var)->visited = 0;
- }
+ }
xbt_swag_reset(&sys->modified_constraint_set);
}
* @brief LMM element
* Elements can be seen as glue between constraint objects and variable objects.
* Basically, each variable will have a set of elements, one for each constraint where it is involved.
- * Then, it is used to list all variables involved in constraint through constraint's xxx_element_set lists, or vice-versa list all constraints for a given variable.
+ * Then, it is used to list all variables involved in constraint through constraint's xxx_element_set lists, or vice-versa list all constraints for a given variable.
*/
typedef struct lmm_element {
/* hookup to constraint */
/** @ingroup SURF_lmm
* @brief LMM constraint
- * Each constraint contains several partially overlapping logical sets of elements:
+ * Each constraint contains several partially overlapping logical sets of elements:
* \li Disabled elements which variable's weight is zero. This variables are not at all processed by LMM, but eventually the corresponding action will enable it (at least this is the idea).
* \li Enabled elements which variable's weight is non-zero. They are utilized in some LMM functions.
- * \li Active elements which variable's weight is non-zero (i.e. it is enabled) AND its element value is non-zero. LMM_solve iterates over active elements during resolution, dynamically making them active or unactive.
- *
+ * \li Active elements which variable's weight is non-zero (i.e. it is enabled) AND its element value is non-zero. LMM_solve iterates over active elements during resolution, dynamically making them active or unactive.
+ *
*/
typedef struct lmm_constraint {
/* hookup to system */
double usage;
double bound;
int concurrency_limit; /* The maximum number of variables that may be enabled at any time (stage variables if necessary) */
- //TODO MARTIN Check maximum value across resources at the end of simulation and give a warning is more than e.g. 500
+ //TODO MARTIN Check maximum value across resources at the end of simulation and give a warning is more than e.g. 500
int concurrency_current; /* The current concurrency */
int concurrency_maximum; /* The maximum number of (enabled and disabled) variables associated to the constraint at any given time (essentially for tracing)*/
-
+
int sharing_policy; /* see @e_surf_link_sharing_policy_t (0: FATPIPE, 1: SHARED, 2: FULLDUPLEX) */
void *id;
int id_int;
/** @ingroup SURF_lmm
* @brief LMM variable
- *
+ *
* When something prevents us from enabling a variable, we "stage" the weight that we would have like to set, so that as soon as possible we enable the variable with desired weight
*/
typedef struct lmm_variable {
/** @ingroup SURF_lmm
* @brief Print information about a lmm system
- *
+ *
* @param sys A lmm system
*/
//XBT_PRIVATE void lmm_print(lmm_system_t sys);
double finish_time = surf_get_clock();
double cpu_load;
double current_speed = host->speed();
- if (current_speed <= 0)
- // Some users declare a pstate of speed 0 flops (e.g., to model boot time).
- // We consider that the machine is then fully loaded. That's arbitrary but it avoids a NaN
- cpu_load = 1;
- else
- cpu_load = lmm_constraint_get_usage(host->pimpl_cpu->constraint()) / current_speed;
-
- /** Divide by the number of cores here **/
- cpu_load /= host->pimpl_cpu->coreCount();
-
- if (cpu_load > 1) // A machine with a load > 1 consumes as much as a fully loaded machine, not more
- cpu_load = 1;
-
- /* The problem with this model is that the load is always 0 or 1, never something less.
- * Another possibility could be to model the total energy as
- *
- * X/(X+Y)*W_idle + Y/(X+Y)*W_burn
- *
- * where X is the amount of idling cores, and Y the amount of computing cores.
- */
- double previous_energy = this->total_energy;
+ if (start_time < finish_time) {
+ // We may have start == finish if the past consumption was updated since the simcall was started
+ // for example if 2 actors requested to update the same host's consumption in a given scheduling round.
+ //
+ // Even in this case, we need to save the pstate for the next call (after this big if),
+ // which may have changed since that recent update.
+
+ if (current_speed <= 0)
+ // Some users declare a pstate of speed 0 flops (e.g., to model boot time).
+ // We consider that the machine is then fully loaded. That's arbitrary but it avoids a NaN
+ cpu_load = 1;
+ else
+ cpu_load = lmm_constraint_get_usage(host->pimpl_cpu->constraint()) / current_speed;
+
+ /** Divide by the number of cores here **/
+ cpu_load /= host->pimpl_cpu->coreCount();
- double instantaneous_consumption;
- if (this->pstate == -1) // The host was off at the beginning of this time interval
- instantaneous_consumption = this->watts_off;
- else
- instantaneous_consumption = this->getCurrentWattsValue(cpu_load);
+ if (cpu_load > 1) // A machine with a load > 1 consumes as much as a fully loaded machine, not more
+ cpu_load = 1;
- double energy_this_step = instantaneous_consumption * (finish_time - start_time);
+ /* The problem with this model is that the load is always 0 or 1, never something less.
+ * Another possibility could be to model the total energy as
+ *
+ * X/(X+Y)*W_idle + Y/(X+Y)*W_burn
+ *
+ * where X is the amount of idling cores, and Y the amount of computing cores.
+ */
+
+ double previous_energy = this->total_energy;
+
+ double instantaneous_consumption;
+ if (this->pstate == -1) // The host was off at the beginning of this time interval
+ instantaneous_consumption = this->watts_off;
+ else
+ instantaneous_consumption = this->getCurrentWattsValue(cpu_load);
- //TODO Trace: Trace energy_this_step from start_time to finish_time in host->name()
+ double energy_this_step = instantaneous_consumption * (finish_time - start_time);
- this->total_energy = previous_energy + energy_this_step;
- this->last_updated = finish_time;
+ // TODO Trace: Trace energy_this_step from start_time to finish_time in host->name()
- XBT_DEBUG(
- "[update_energy of %s] period=[%.2f-%.2f]; current power peak=%.0E flop/s; consumption change: %.2f J -> %.2f J",
- host->cname(), start_time, finish_time, host->pimpl_cpu->speed_.peak, previous_energy, energy_this_step);
+ this->total_energy = previous_energy + energy_this_step;
+ this->last_updated = finish_time;
+
+ XBT_DEBUG("[update_energy of %s] period=[%.2f-%.2f]; current power peak=%.0E flop/s; consumption change: %.2f J -> "
+ "%.2f J",
+ host->cname(), start_time, finish_time, host->pimpl_cpu->speed_.peak, previous_energy, energy_this_step);
+ }
/* Save data for the upcoming time interval: whether it's on/off and the pstate if it's on */
this->pstate = host->isOn() ? host->pstate() : -1;
simgrid::surf::CpuAction::onStateChange.connect(&onActionStateChange);
}
+/** @brief updates the consumption of all hosts
+ *
+ * After this call, sg_host_get_consumed_energy() will not interrupt your process
+ * (until after the next clock update).
+ */
+void sg_host_energy_update_all()
+{
+ simgrid::simix::kernelImmediate([]() {
+ std::vector<simgrid::s4u::Host*> list;
+ simgrid::s4u::Engine::instance()->hostList(&list);
+ for (auto host : list)
+ host->extension<HostEnergy>()->update();
+ });
+}
+
/** @brief Returns the total energy consumed by the host so far (in Joules)
+ *
+ * Please note that since the consumption is lazily updated, it may require a simcall to update it.
+ * The result is that the actor requesting this value will be interrupted,
+ * the value will be updated in kernel mode before returning the control to the requesting actor.
*
* See also @ref SURF_plugin_energy.
*/
surf_cpu_model_pm = new CpuL07Model(this,maxminSystem_);
}
-HostL07Model::~HostL07Model()
+HostL07Model::~HostL07Model()
{
lmm_system_free(maxminSystem_);
maxminSystem_ = nullptr;
host->pimpl_cpu->setPState(args->pstate);
if (args->coord && strcmp(args->coord, ""))
new simgrid::kernel::routing::vivaldi::Coords(host->pimpl_netpoint, args->coord);
-
+
}
/** @brief Add a "router" to the network element list */
xbt_assert(current_routing, "Cannot seal the current AS: none under construction");
current_routing->seal();
simgrid::s4u::NetZone::onSeal(*current_routing);
- current_routing = static_cast<simgrid::kernel::routing::NetZoneImpl*>(current_routing->father());
+ current_routing = static_cast<simgrid::kernel::routing::NetZoneImpl*>(current_routing->father());
}
/** @brief Add a link connecting an host to the rest of its AS (which must be cluster or vivaldi) */
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
+ * if you want the limit (max/min) macros for int types.
*/
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS 1
typedef signed char flex_int8_t;
typedef short int flex_int16_t;
typedef int flex_int32_t;
-typedef unsigned char flex_uint8_t;
+typedef unsigned char flex_uint8_t;
typedef unsigned short int flex_uint16_t;
typedef unsigned int flex_uint32_t;
if ( *p == '\n' )\
--surf_parse_lineno;\
}while(0)
-
+
/* Return all but the first "n" matched characters back to the input stream. */
#define yyless(n) \
do \
/* Table of booleans, true if rule could match eol. */
static yyconst flex_int32_t yy_rule_can_match_eol[648] =
{ 0,
-0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0,
- 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
- 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
- 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
- 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1,
- 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1,
- 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
- 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1,
- 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
- 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
- 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
- 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
- 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
- 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
- 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0,
+0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0,
+ 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
+ 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
+ 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
+ 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1,
+ 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1,
+ 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+ 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1,
+ 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
+ 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
+ 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
+ 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
+ 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
+ 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+ 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, };
static yy_state_type yy_last_accepting_state;
* FleXML is Copyright (C) 1999-2005 Kristoffer Rose. All rights reserved.
* FleXML is Copyright (C) 2003-2013 Martin Quinson. All rights reserved.
* (1.9.6).
- *
+ *
* There are two, intertwined parts to this program, part A and part B.
*
* Part A
* ------
- *
- * Some parts, here collectively called "Part A", are found in the
+ *
+ * Some parts, here collectively called "Part A", are found in the
* FleXML package. They are Copyright (C) 1999-2005 Kristoffer Rose
* and Copyright (C) 2003-2013 Martin Quinson. All rights reserved.
*
* Notice that these are explicit rights granted to you for files
* generated by the FleXML system. For your rights in connection with
* the FleXML system itself please consult the GNU General Public License.
- *
+ *
* Part B
* ------
- *
- * The other parts, here collectively called "Part B", and which came
- * from the DTD used by FleXML to generate this program, can be
+ *
+ * The other parts, here collectively called "Part B", and which came
+ * from the DTD used by FleXML to generate this program, can be
* distributed (or not, as the case may be) under the terms of whoever
- * wrote them, provided these terms respect and obey the two conditions
+ * wrote them, provided these terms respect and obey the two conditions
* above under the heading "Part A".
*
* The author of and contributors to FleXML specifically disclaim
- * any copyright interest in "Part B", unless "Part B" was written
+ * any copyright interest in "Part B", unless "Part B" was written
* by the author of or contributors to FleXML.
- *
+ *
*/
/* Version strings. */
#include <assert.h>
#include <stdarg.h>
#include <ctype.h>
-
+
#if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(__TOS_WIN__)
# ifndef __STRICT_ANSI__
# include <io.h>
#else
# include <unistd.h>
#endif
-
+
#ifndef FLEXML_INDEXSTACKSIZE
#define FLEXML_INDEXSTACKSIZE 1000
#endif
{
ck_ilimit();
indexstack[inext++] = p;
- indexstack[inext++] = bnext;
+ indexstack[inext++] = bnext;
}
static int popbuffer(void)
#endif
#ifndef YY_NO_UNPUT
-
+
#endif
#ifndef yytext_ptr
static int yy_start_stack_ptr = 0;
static int yy_start_stack_depth = 0;
static int *yy_start_stack = NULL;
-
+
static void yy_push_state (int _new_state );
-
+
static void yy_pop_state (void );
-
+
/* Amount of stuff to slurp up with each read. */
#ifndef YY_READ_BUF_SIZE
#ifdef __ia64__
yy_state_type yy_current_state;
char *yy_cp, *yy_bp;
int yy_act;
-
+
if ( !(yy_init) )
{
(yy_init) = 1;
unsigned int yyl;
for ( yyl = 0; yyl < surf_parse_leng; ++yyl )
if ( surf_parse_text[yyl] == '\n' )
-
+
surf_parse_lineno++;
;
}
case 11:
/* rule 11 can match eol */
YY_RULE_SETUP
-SET(DOCTYPE);
+SET(DOCTYPE);
YY_BREAK
case 12:
/* rule 12 can match eol */
if(!ETag_surfxml_include_state()) FAIL("Premature EOF: `</peer>' expected.");
YY_BREAK
-/* <!--
+/* <!--
* DTD of SimGrid platform and deployment files.
* More info: http://simgrid.gforge.inria.fr/simgrid/latest/doc/platform.html
* To upgrade your files, use the tool simgrid_update_xml
* - Rename (power->speed) the attributes describing the amount of flop
* that a <host>, <peer>, <cluster> or <cabinet> can deliver per second.
* - In <trace_connect>, attribute kind="POWER" is now kind="SPEED".
- *
+ *
* - In <host> and <link>, attributes availability and state are gone.
* It was redundent with state and availability traces, and with peak values.
- *
- * - In <cluster>, cannot set the availability nor state traces.
+ *
+ * - In <cluster>, cannot set the availability nor state traces.
* This was too complex and unused.
* - The DOCTYPE points to the right URL (this file):
* http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd
- *
+ *
* - Kill <gpu>. Was not doing anything.
* - A warning is emitted for unit-less values (they are still accepted).
* - speed. Default: 'f' or 'flops'. Also defined:
* - PLATFORM_DESCRIPTION -> PLATFORM
* * New in DTD version 1 (in SimGrid 3.3):
* - DTD is now versionned with the version attribute of platform
- * - Unit change:
+ * - Unit change:
* - Link bandwidth: from Mb/s to b/s
* - CPU speed: from MFlop/s to Flop/s
* --> */
{
yy_state_type yy_current_state;
char *yy_cp;
-
+
yy_current_state = (yy_start);
for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp )
{
int c;
-
+
*(yy_c_buf_p) = (yy_hold_char);
if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR )
(yy_hold_char) = *++(yy_c_buf_p);
if ( c == '\n' )
-
+
surf_parse_lineno++;
;
/** Immediately switch to a different input stream.
* @param input_file A readable stream.
- *
+ *
* @note This function does not reset the start condition to @c INITIAL .
*/
void surf_parse_restart (FILE * input_file )
{
-
+
if ( ! YY_CURRENT_BUFFER ){
surf_parse_ensure_buffer_stack ();
YY_CURRENT_BUFFER_LVALUE =
/** Switch to a different input buffer.
* @param new_buffer The new input buffer.
- *
+ *
*/
void surf_parse__switch_to_buffer (YY_BUFFER_STATE new_buffer )
{
-
+
/* TODO. We should be able to replace this entire function body
* with
* surf_parse_pop_buffer_state();
/** Allocate and initialize an input buffer state.
* @param file A readable stream.
* @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
- *
+ *
* @return the allocated buffer state.
*/
YY_BUFFER_STATE surf_parse__create_buffer (FILE * file, int size )
{
YY_BUFFER_STATE b;
-
+
b = (YY_BUFFER_STATE) surf_parse_alloc(sizeof( struct yy_buffer_state ) );
if ( ! b )
YY_FATAL_ERROR( "out of dynamic memory in surf_parse__create_buffer()" );
/** Destroy the buffer.
* @param b a buffer created with surf_parse__create_buffer()
- *
+ *
*/
void surf_parse__delete_buffer (YY_BUFFER_STATE b )
{
-
+
if ( ! b )
return;
{
int oerrno = errno;
-
+
surf_parse__flush_buffer(b );
b->yy_input_file = file;
}
b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0;
-
+
errno = oerrno;
}
/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
* @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
- *
+ *
*/
void surf_parse__flush_buffer (YY_BUFFER_STATE b )
{
* the current state. This function will allocate the stack
* if necessary.
* @param new_buffer The new state.
- *
+ *
*/
void surf_parse_push_buffer_state (YY_BUFFER_STATE new_buffer )
{
/** Removes and deletes the top of the stack, if present.
* The next element becomes the new top.
- *
+ *
*/
void surf_parse_pop_buffer_state (void)
{
static void surf_parse_ensure_buffer_stack (void)
{
int num_to_alloc;
-
+
if (!(yy_buffer_stack)) {
/* First allocation is just for 2 elements, since we don't know if this
/** Setup the input buffer state to scan directly from a user-specified character buffer.
* @param base the character buffer
* @param size the size in bytes of the character buffer
- *
+ *
* @return the newly allocated buffer state object.
*/
YY_BUFFER_STATE surf_parse__scan_buffer (char * base, yy_size_t size )
{
YY_BUFFER_STATE b;
-
+
if ( size < 2 ||
base[size-2] != YY_END_OF_BUFFER_CHAR ||
base[size-1] != YY_END_OF_BUFFER_CHAR )
/** Setup the input buffer state to scan a string. The next call to surf_parse_lex() will
* scan from a @e copy of @a str.
* @param yystr a NUL-terminated string to scan
- *
+ *
* @return the newly allocated buffer state object.
* @note If you want to scan bytes that may contain NUL values, then use
* surf_parse__scan_bytes() instead.
*/
YY_BUFFER_STATE surf_parse__scan_string (yyconst char * yystr )
{
-
+
return surf_parse__scan_bytes(yystr,(int) strlen(yystr) );
}
* scan from a @e copy of @a bytes.
* @param yybytes the byte buffer to scan
* @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
- *
+ *
* @return the newly allocated buffer state object.
*/
YY_BUFFER_STATE surf_parse__scan_bytes (yyconst char * yybytes, int _yybytes_len )
char *buf;
yy_size_t n;
int i;
-
+
/* Get memory for full buffer, including space for trailing EOB's. */
n = (yy_size_t) (_yybytes_len + 2);
buf = (char *) surf_parse_alloc(n );
/* Accessor methods (get/set functions) to struct members. */
/** Get the current line number.
- *
+ *
*/
int surf_parse_get_lineno (void)
{
-
+
return surf_parse_lineno;
}
/** Get the input stream.
- *
+ *
*/
FILE *surf_parse_get_in (void)
{
}
/** Get the output stream.
- *
+ *
*/
FILE *surf_parse_get_out (void)
{
}
/** Get the length of the current token.
- *
+ *
*/
int surf_parse_get_leng (void)
{
}
/** Get the current token.
- *
+ *
*/
char *surf_parse_get_text (void)
/** Set the current line number.
* @param _line_number line number
- *
+ *
*/
void surf_parse_set_lineno (int _line_number )
{
-
+
surf_parse_lineno = _line_number;
}
/** Set the input stream. This does not discard the current
* input buffer.
* @param _in_str A readable stream.
- *
+ *
* @see surf_parse__switch_to_buffer
*/
void surf_parse_set_in (FILE * _in_str )
/* We do not touch surf_parse_lineno unless the option is enabled. */
surf_parse_lineno = 1;
-
+
(yy_buffer_stack) = NULL;
(yy_buffer_stack_top) = 0;
(yy_buffer_stack_max) = 0;
/* surf_parse_lex_destroy is for both reentrant and non-reentrant scanners. */
int surf_parse_lex_destroy (void)
{
-
+
/* Pop the buffer stack, destroying each element. */
while(YY_CURRENT_BUFFER){
surf_parse__delete_buffer(YY_CURRENT_BUFFER );
#ifndef yytext_ptr
static void yy_flex_strncpy (char* s1, yyconst char * s2, int n )
{
-
+
int i;
for ( i = 0; i < n; ++i )
s1[i] = s2[i];
void *surf_parse_realloc (void * ptr, yy_size_t size )
{
-
+
/* The cast to (char *) in the following accommodates both
* implementations that use char* generic pointers, and those
* that use void* generic pointers. It works with the latter
* FleXML is Copyright (C) 1999-2005 Kristoffer Rose. All rights reserved.
* FleXML is Copyright (C) 2003-2013 Martin Quinson. All rights reserved.
* (1.9.6).
- *
+ *
* There are two, intertwined parts to this program, part A and part B.
*
* Part A
* ------
- *
- * Some parts, here collectively called "Part A", are found in the
+ *
+ * Some parts, here collectively called "Part A", are found in the
* FleXML package. They are Copyright (C) 1999-2005 Kristoffer Rose
* and Copyright (C) 2003-2013 Martin Quinson. All rights reserved.
*
* Notice that these are explicit rights granted to you for files
* generated by the FleXML system. For your rights in connection with
* the FleXML system itself please consult the GNU General Public License.
- *
+ *
* Part B
* ------
- *
- * The other parts, here collectively called "Part B", and which came
- * from the DTD used by FleXML to generate this program, can be
+ *
+ * The other parts, here collectively called "Part B", and which came
+ * from the DTD used by FleXML to generate this program, can be
* distributed (or not, as the case may be) under the terms of whoever
- * wrote them, provided these terms respect and obey the two conditions
+ * wrote them, provided these terms respect and obey the two conditions
* above under the heading "Part A".
*
* The author of and contributors to FleXML specifically disclaim
- * any copyright interest in "Part B", unless "Part B" was written
+ * any copyright interest in "Part B", unless "Part B" was written
* by the author of or contributors to FleXML.
- *
+ *
*/
#ifndef _FLEXML_simgrid_H
state->type = type;
state->id = xbt_strdup(id);
state->in = xbt_dynar_new(sizeof(xbt_automaton_transition_t), xbt_automaton_transition_free_voidp);
- state->out = xbt_dynar_new(sizeof(xbt_automaton_transition_t), xbt_automaton_transition_free_voidp);
+ state->out = xbt_dynar_new(sizeof(xbt_automaton_transition_t), xbt_automaton_transition_free_voidp);
xbt_dynar_push(a->states, &state);
return state;
}
if(xbt_automaton_state_compare(((xbt_automaton_transition_t)t1)->src, ((xbt_automaton_transition_t)t2)->src))
return 1;
-
+
if(xbt_automaton_state_compare(((xbt_automaton_transition_t)t1)->dst, ((xbt_automaton_transition_t)t2)->dst))
return 1;
return 1;
return 0;
-
+
}
int xbt_automaton_exp_label_compare(xbt_automaton_exp_label_t l1, xbt_automaton_exp_label_t l2){
return 1;
switch(l1->type){
- case 0 : // OR
+ case 0 : // OR
case 1 : // AND
if(xbt_automaton_exp_label_compare(l1->u.or_and.left_exp, l2->u.or_and.left_exp))
return 1;
int* iptr2 = xbt_dynar_get_ptr(s2, cursor);
if(*iptr1 != *iptr2)
return 1;
- }
+ }
return 0;
}
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
+ * if you want the limit (max/min) macros for int types.
*/
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS 1
typedef signed char flex_int8_t;
typedef short int flex_int16_t;
typedef int flex_int32_t;
-typedef unsigned char flex_uint8_t;
+typedef unsigned char flex_uint8_t;
typedef unsigned short int flex_uint16_t;
typedef unsigned int flex_uint32_t;
#define YY_LESS_LINENO(n)
#define YY_LINENO_REWIND_TO(ptr)
-
+
/* Return all but the first "n" matched characters back to the input stream. */
#define yyless(n) \
do \
int yy_bs_lineno; /**< The line count. */
int yy_bs_column; /**< The column count. */
-
+
/* Whether to try to fill the input buffer when we reach the
* end of it.
*/
#include <stdio.h>
#include "parserPromela.tab.hacc"
-
+
extern YYSTYPE yylval;
-
+
#line 548 "automaton_lexer.yy.c"
#define INITIAL 0
#endif
#ifndef YY_NO_UNPUT
-
+
static void yyunput (int c,char *buf_ptr );
-
+
#endif
#ifndef yytext_ptr
yy_state_type yy_current_state;
char *yy_cp, *yy_bp;
int yy_act;
-
+
if ( !(yy_init) )
{
(yy_init) = 1;
case 19:
YY_RULE_SETUP
#line 63 "parserPromela.lex"
-{ sscanf(xbt_automaton_parser_text,"%lf",&yylval.real);
+{ sscanf(xbt_automaton_parser_text,"%lf",&yylval.real);
return (LITT_REEL); }
YY_BREAK
case 20:
YY_RULE_SETUP
#line 66 "parserPromela.lex"
-{ sscanf(xbt_automaton_parser_text,"%d",&yylval.integer);
+{ sscanf(xbt_automaton_parser_text,"%d",&yylval.integer);
return (LITT_ENT); }
YY_BREAK
case 21:
YY_RULE_SETUP
#line 69 "parserPromela.lex"
{ yylval.string=(char *)malloc(strlen(xbt_automaton_parser_text)+1);
- sscanf(xbt_automaton_parser_text,"%s",yylval.string);
+ sscanf(xbt_automaton_parser_text,"%s",yylval.string);
return (LITT_CHAINE); }
YY_BREAK
case 22:
{
yy_state_type yy_current_state;
char *yy_cp;
-
+
yy_current_state = (yy_start);
for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp )
static void yyunput (int c, char * yy_bp )
{
char *yy_cp;
-
+
yy_cp = (yy_c_buf_p);
/* undo effects of setting up xbt_automaton_parser_text */
{
int c;
-
+
*(yy_c_buf_p) = (yy_hold_char);
if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR )
/** Immediately switch to a different input stream.
* @param input_file A readable stream.
- *
+ *
* @note This function does not reset the start condition to @c INITIAL .
*/
void xbt_automaton_parser_restart (FILE * input_file )
{
-
+
if ( ! YY_CURRENT_BUFFER ){
xbt_automaton_parser_ensure_buffer_stack ();
YY_CURRENT_BUFFER_LVALUE =
/** Switch to a different input buffer.
* @param new_buffer The new input buffer.
- *
+ *
*/
void xbt_automaton_parser__switch_to_buffer (YY_BUFFER_STATE new_buffer )
{
-
+
/* TODO. We should be able to replace this entire function body
* with
* xbt_automaton_parser_pop_buffer_state();
/** Allocate and initialize an input buffer state.
* @param file A readable stream.
* @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
- *
+ *
* @return the allocated buffer state.
*/
YY_BUFFER_STATE xbt_automaton_parser__create_buffer (FILE * file, int size )
{
YY_BUFFER_STATE b;
-
+
b = (YY_BUFFER_STATE) xbt_automaton_parser_alloc(sizeof( struct yy_buffer_state ) );
if ( ! b )
YY_FATAL_ERROR( "out of dynamic memory in xbt_automaton_parser__create_buffer()" );
/** Destroy the buffer.
* @param b a buffer created with xbt_automaton_parser__create_buffer()
- *
+ *
*/
void xbt_automaton_parser__delete_buffer (YY_BUFFER_STATE b )
{
-
+
if ( ! b )
return;
{
int oerrno = errno;
-
+
xbt_automaton_parser__flush_buffer(b );
b->yy_input_file = file;
}
b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0;
-
+
errno = oerrno;
}
/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
* @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
- *
+ *
*/
void xbt_automaton_parser__flush_buffer (YY_BUFFER_STATE b )
{
* the current state. This function will allocate the stack
* if necessary.
* @param new_buffer The new state.
- *
+ *
*/
void xbt_automaton_parser_push_buffer_state (YY_BUFFER_STATE new_buffer )
{
/** Removes and deletes the top of the stack, if present.
* The next element becomes the new top.
- *
+ *
*/
void xbt_automaton_parser_pop_buffer_state (void)
{
static void xbt_automaton_parser_ensure_buffer_stack (void)
{
yy_size_t num_to_alloc;
-
+
if (!(yy_buffer_stack)) {
/* First allocation is just for 2 elements, since we don't know if this
);
if ( ! (yy_buffer_stack) )
YY_FATAL_ERROR( "out of dynamic memory in xbt_automaton_parser_ensure_buffer_stack()" );
-
+
memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*));
-
+
(yy_buffer_stack_max) = num_to_alloc;
(yy_buffer_stack_top) = 0;
return;
/** Setup the input buffer state to scan directly from a user-specified character buffer.
* @param base the character buffer
* @param size the size in bytes of the character buffer
- *
- * @return the newly allocated buffer state object.
+ *
+ * @return the newly allocated buffer state object.
*/
YY_BUFFER_STATE xbt_automaton_parser__scan_buffer (char * base, yy_size_t size )
{
YY_BUFFER_STATE b;
-
+
if ( size < 2 ||
base[size-2] != YY_END_OF_BUFFER_CHAR ||
base[size-1] != YY_END_OF_BUFFER_CHAR )
/** Setup the input buffer state to scan a string. The next call to xbt_automaton_parser_lex() will
* scan from a @e copy of @a str.
* @param yystr a NUL-terminated string to scan
- *
+ *
* @return the newly allocated buffer state object.
* @note If you want to scan bytes that may contain NUL values, then use
* xbt_automaton_parser__scan_bytes() instead.
*/
YY_BUFFER_STATE xbt_automaton_parser__scan_string (yyconst char * yystr )
{
-
+
return xbt_automaton_parser__scan_bytes(yystr,strlen(yystr) );
}
* scan from a @e copy of @a bytes.
* @param yybytes the byte buffer to scan
* @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
- *
+ *
* @return the newly allocated buffer state object.
*/
YY_BUFFER_STATE xbt_automaton_parser__scan_bytes (yyconst char * yybytes, yy_size_t _yybytes_len )
char *buf;
yy_size_t n;
yy_size_t i;
-
+
/* Get memory for full buffer, including space for trailing EOB's. */
n = _yybytes_len + 2;
buf = (char *) xbt_automaton_parser_alloc(n );
/* Accessor methods (get/set functions) to struct members. */
/** Get the current line number.
- *
+ *
*/
int xbt_automaton_parser_get_lineno (void)
{
-
+
return xbt_automaton_parser_lineno;
}
/** Get the input stream.
- *
+ *
*/
FILE *xbt_automaton_parser_get_in (void)
{
}
/** Get the output stream.
- *
+ *
*/
FILE *xbt_automaton_parser_get_out (void)
{
}
/** Get the length of the current token.
- *
+ *
*/
yy_size_t xbt_automaton_parser_get_leng (void)
{
}
/** Get the current token.
- *
+ *
*/
char *xbt_automaton_parser_get_text (void)
/** Set the current line number.
* @param _line_number line number
- *
+ *
*/
void xbt_automaton_parser_set_lineno (int _line_number )
{
-
+
xbt_automaton_parser_lineno = _line_number;
}
/** Set the input stream. This does not discard the current
* input buffer.
* @param _in_str A readable stream.
- *
+ *
* @see xbt_automaton_parser__switch_to_buffer
*/
void xbt_automaton_parser_set_in (FILE * _in_str )
/* xbt_automaton_parser_lex_destroy is for both reentrant and non-reentrant scanners. */
int xbt_automaton_parser_lex_destroy (void)
{
-
+
/* Pop the buffer stack, destroying each element. */
while(YY_CURRENT_BUFFER){
xbt_automaton_parser__delete_buffer(YY_CURRENT_BUFFER );
#ifndef yytext_ptr
static void yy_flex_strncpy (char* s1, yyconst char * s2, int n )
{
-
+
int i;
for ( i = 0; i < n; ++i )
s1[i] = s2[i];
void *xbt_automaton_parser_realloc (void * ptr, yy_size_t size )
{
-
+
/* The cast to (char *) in the following accommodates both
* implementations that use char* generic pointers, and those
* that use void* generic pointers. It works with the latter
new_state(id, 0);
xbt_automaton_state_t state_dst = xbt_automaton_state_exists(parsed_automaton, id);
xbt_automaton_state_t state_src = xbt_automaton_state_exists(parsed_automaton, state_id_src);
-
+
//xbt_transition_t trans = NULL;
xbt_automaton_transition_new(parsed_automaton, state_src, state_dst, label);
*
* Beware that if your dynar contains pointed values (such as strings) instead of scalar, this function is probably not
* what you want. Check the documentation of xbt_dynar_search() for more info.
- *
+ *
* Note that usually, the dynar indices are unsigned integers. If you have more than 2 million elements in your dynar,
* this very function will not work (but the other will).
*/
return -1;
}
-/** @brief Returns a boolean indicating whether the element is part of the dynar
+/** @brief Returns a boolean indicating whether the element is part of the dynar
*
* Beware that if your dynar contains pointed values (such as strings) instead of scalar, this function is probably not
* what you want. Check the documentation of xbt_dynar_search() for more info.
}
}
-/** @brief Transform a dynar into a nullptr terminated array.
+/** @brief Transform a dynar into a nullptr terminated array.
*
* \param dynar the dynar to transform
* \return pointer to the first element of the array
XBT_LOG_CONNECT(s4u_channel);
XBT_LOG_CONNECT(s4u_comm);
XBT_LOG_CONNECT(s4u_file);
-
+
/* sg */
XBT_LOG_CONNECT(sg_host);
XBT_LOG_CONNECT(surf_trace);
XBT_LOG_CONNECT(surf_vm);
XBT_LOG_CONNECT(surf_host);
-
+
#endif /* simgrid_EXPORTS */
}
If FD is a valid file descriptor for an open file then data for the
mmalloc managed region is mapped to that file, otherwise an anonymous
map is used if supported by the underlying OS. In case of running in
- an OS without support of anonymous mappings then "/dev/zero" is used
+ an OS without support of anonymous mappings then "/dev/zero" is used
and in both cases the data will not exist in any filesystem object.
If the open file corresponding to FD is from a previous use of
the current process as the process that created the file (ignoring
the BASEADDR parameter).
- For non valid FDs or empty files ones the mapping is established
- starting at the specified address BASEADDR in the process address
+ For non valid FDs or empty files ones the mapping is established
+ starting at the specified address BASEADDR in the process address
space.
The provided BASEADDR should be choosed carefully in order to avoid
mdp->next_mdesc = NULL;
mdp->refcount = 1;
mdp->options = options;
-
+
/* If we have not been passed a valid open file descriptor for the file
to map to, then we go for an anonymous map */
abort();
}
- /* Add the new heap to the linked list of heaps attached by mmalloc */
+ /* Add the new heap to the linked list of heaps attached by mmalloc */
if(__mmalloc_default_mdp){
mdp = __mmalloc_default_mdp;
while(mdp->next_mdesc)
ssize_t frag = ((uintptr_t) (ADDR2UINT (ptr) % (BLOCKSIZE))) >> heap->heapinfo[block].type;
return heap->heapinfo[block].busy_frag.frag_size[frag];
}
-
+
}
void mmcheck(xbt_mheap_t heap) {return;
int type; /* 0: busy large block
>0: busy fragmented (fragments of size 2^type bytes)
<0: free block */
-
+
union {
/* Heap information for a busy block. */
struct {
parmap->workers[0] = nullptr;
#if HAVE_PTHREAD_SETAFFINITY
int core_bind = 0;
-#endif
+#endif
for (unsigned int i = 1; i < num_workers; i++) {
xbt_parmap_thread_data_t data = xbt_new0(s_xbt_parmap_thread_data_t, 1);
data->parmap = parmap;
if (core_bind != xbt_os_get_numcores() - 1)
core_bind++;
else
- core_bind = 0;
+ core_bind = 0;
#endif
}
return parmap;
data->count=-1;
else
data->count=0;
- open_append2_file(data);
+ open_append2_file(data);
res->data = data;
return res;
}
xbt_cmdline = xbt_dynar_new(sizeof(char*), NULL);
for (int i = 0; i < *argc; i++)
xbt_dynar_push(xbt_cmdline,&(argv[i]));
-
+
xbt_log_init(argc, argv);
}
int errcode = pthread_key_create(&xbt_self_thread_key, NULL);
xbt_assert(errcode == 0, "pthread_key_create failed for xbt_self_thread_key");
-
+
main_thread = xbt_new(s_xbt_os_thread_t, 1);
main_thread->name = NULL;
main_thread->detached = 0;
res_thread->start_routine = start_routine;
res_thread->param = param;
res_thread->extra_data = extra_data;
-
+
int errcode = pthread_create(&(res_thread->t), &thread_attr, wrapper_start_routine, res_thread);
xbt_assert(errcode == 0, "pthread_create failed: %s", strerror(errcode));
XBT_LOG_NEW_DEFAULT_CATEGORY(msg_test, "Messages specific for this msg example");
-static int master(int argc, char *argv[])
+static int master(int /*argc*/, char* /*argv*/ [])
{
double task_comp_size = 5E7;
double task_comm_size = 1E6;
return 0;
}
-static int worker_main(int argc, char *argv[])
+static int worker_main(int /*argc*/, char* /*argv*/ [])
{
msg_task_t task = (msg_task_t) MSG_process_get_data(MSG_process_self());
msg_error_t res;
return 0;
}
-static int worker(int argc, char *argv[])
+static int worker(int /*argc*/, char* /*argv*/ [])
{
while (1) {
msg_task_t task = NULL;
int count = sg_link_count();
XBT_INFO("Link count: %d", count);
qsort((void *)links, count, sizeof(SD_link_t), cmp_link);
-
+
for (int i=0; i < count; i++){
XBT_INFO("%s: latency = %.5f, bandwidth = %f", sg_link_name(links[i]),
sg_link_latency(links[i]), sg_link_bandwidth(links[i]));
/*
* intra communication test
* All2All
- *
+ *
* send 1 byte from all to all
* + 2 secs latency
* should be 8 (platform_4p_1switch.xml)
/*
* intra communication test
* independent communication
- *
- * 0 -> 1
+ *
+ * 0 -> 1
* 2 -> 3
* shared is only switch which is fat pipe
* should be 1 + 2 latency = 3
/*
* intra communication test 1
* scatter
- *
+ *
* start: 1 2 3 (each having 1/3 of the bandwidth)
* after 3 sec: 0 1 2 (having 1/2 of the bandwidth)
* after another 2 sec: 0 0 1 (having all the bandwidth)
/*
* simple latency test
- * send one byte from 0 to 1
- *
+ * send one byte from 0 to 1
+ *
* this is a test for multiple platforms
* see tesh file for expected output
*/
/*
* bw and latency test 2
- * send 2 x 1 byte from 2 task in same direction 0 -> 1
+ * send 2 x 1 byte from 2 task in same direction 0 -> 1
*/
int main(int argc, char **argv)
/**
* bw and latency test 3
* same intention as test 2
- * sending 2 x 1 bytes at the same time
+ * sending 2 x 1 bytes at the same time
* this time in opposite direction
*/
#define TASK_NUM 3
/**
- * 3 tasks send 1 byte in parallel
+ * 3 tasks send 1 byte in parallel
* 3 flows exceed bandwidth
* should be 10001.5
* because the max tcp win size is 20000
* under the terms of the license (GNU LGPL) which comes with this package. */
/* Bug report: https://gforge.inria.fr/tracker/index.php?func=detail&aid=17132&group_id=12&atid=165 */
-
+
#include "xbt/log.h"
#include <stdio.h>
#include <mpi.h>
int count = 2;
int* sb = (int *) xbt_malloc(count * sizeof(int));
int* rb = (int *) xbt_malloc(count * size * sizeof(int));
-
+
for (int i = 0; i < count; ++i)
sb[i] = rank * count + i;
for (int i = 0; i < count * size; ++i)
for (i = 0; i < recv_counts[rank]; ++i)
sb[i] = recv_disps[rank] + i;
- for (i = 0; i < recv_sb_size; ++i)
+ for (i = 0; i < recv_sb_size; ++i)
rb[i] = -1;
printf("[%d] sndbuf=[", rank);
mult = size;
int* sb = (int *) xbt_malloc(size *maxlen * sizeof(int));
int* rb = (int *) xbt_malloc(size *maxlen * sizeof(int));
-
+
for (i = 0; i < size *maxlen; ++i) {
sb[i] = rank*size + i;
rb[i] = 0;
int* sb = (int *) xbt_malloc(size * sizeof(int) * 2);
int* rb = (int *) xbt_malloc(size * sizeof(int) * 2);
-
+
for (i = 0; i < size; ++i) {
sb[i] = rank*size + i;
rb[i] = 0;
print_buffer_int( rbuf, size*size, strdup("rbuf:"),rank);
- MPI_Barrier(MPI_COMM_WORLD);
+ MPI_Barrier(MPI_COMM_WORLD);
if (0 == rank) {
printf("Alltoallv TEST COMPLETE.\n");
}
xbt_free(values);
count = 4096;
- values = (int *) xbt_malloc(count * sizeof(int));
+ values = (int *) xbt_malloc(count * sizeof(int));
for (i = 0; i < count; i++)
values[i] = (size -1 == rank) ? 17 : 3;
int count = 2;
int* sb = (int *) xbt_malloc(count * sizeof(int));
int* rb = (int *) xbt_malloc(count * size * sizeof(int));
-
+
for (int i = 0; i < count; ++i)
sb[i] = rank * count + i;
for (int i = 0; i < count * size; ++i)
/* This program is free software; you can redistribute it and/or modify it
* under the terms of the license (GNU LGPL) which comes with this package. */
-/*
+/*
* Test of reduce scatter.
* Each processor contributes its rank + the index to the reduction, then receives the ith sum
* Can be called with any number of processors.
MPI_Comm_size( comm, &size );
MPI_Comm_rank( comm, &rank );
int* sendbuf = (int *) malloc( size * sizeof(int) );
- for (i=0; i<size; i++)
+ for (i=0; i<size; i++)
sendbuf[i] = rank + i;
int* recvcounts = (int*) malloc (size * sizeof(int));
int* recvbuf = (int*) malloc (size * sizeof(int));
- for (i=0; i<size; i++)
+ for (i=0; i<size; i++)
recvcounts[i] = 1;
MPI_Reduce_scatter( sendbuf, recvbuf, recvcounts, MPI_INT, MPI_SUM, comm );
int sumval = size * rank + ((size - 1) * size)/2;
}
else if (rank == 0)
{
- MPI_Recv (buf1, buf_size, MPI_INT,
+ MPI_Recv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Send (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD);
- MPI_Recv (buf0, buf_size, MPI_INT,
+ MPI_Recv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
}
else if (rank == 1)
}
else if (rank == 0)
{
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Recv (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &status);
MPI_Send (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD);
- MPI_Recv (buf1, buf_size, MPI_INT,
+ MPI_Recv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Wait (&req, &status);
}
else if (rank == 0)
{
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Recv (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD, &status);
MPI_Send (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD);
- MPI_Recv (buf1, buf_size, MPI_INT,
+ MPI_Recv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Wait (&req, &status);
}
else if (rank == 0)
{
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Recv (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD, &status);
MPI_Send (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD);
- MPI_Recv (buf1, buf_size, MPI_INT,
+ MPI_Recv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Wait (&req, &status);
}
else if (rank == 0)
{
- MPI_Recv (buf1, buf_size, MPI_INT,
+ MPI_Recv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Send (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD);
- MPI_Recv (buf0, buf_size, MPI_INT,
+ MPI_Recv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
}
else if (rank == 1)
}
else if (rank == 0)
{
- MPI_Recv (buf1, buf_size, MPI_INT,
+ MPI_Recv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Send (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD);
- MPI_Recv (buf0, buf_size, MPI_INT,
+ MPI_Recv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
}
else if (rank == 1)
}
else if (rank == 0)
{
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Recv (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &status);
MPI_Send (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD);
- MPI_Recv (buf1, buf_size, MPI_INT,
+ MPI_Recv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Wait (&req, &status);
}
else if (rank == 0)
{
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Recv (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD, &status);
MPI_Send (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD);
- MPI_Recv (buf1, buf_size, MPI_INT,
+ MPI_Recv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Wait (&req, &status);
{
//sleep (60);
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Recv (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &status);
MPI_Send (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD);
- MPI_Recv (buf1, buf_size, MPI_INT,
+ MPI_Recv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Wait (&req, &status);
{
//sleep (60);
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Recv (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD, &status);
MPI_Send (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD);
- MPI_Recv (buf1, buf_size, MPI_INT,
+ MPI_Recv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Wait (&req, &status);
{
// sleep (60);
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Recv (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &status);
MPI_Send (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD);
- MPI_Recv (buf1, buf_size, MPI_INT,
+ MPI_Recv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Wait (&req, &status);
{
// sleep (60);
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Recv (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD, &status);
MPI_Send (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD);
- MPI_Recv (buf1, buf_size, MPI_INT,
+ MPI_Recv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Wait (&req, &status);
}
else if (rank == 0)
{
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Recv (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD, &status);
MPI_Send (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD);
- MPI_Recv (buf1, buf_size, MPI_INT,
+ MPI_Recv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Wait (&req, &status);
{
// sleep (60);
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Recv (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD, &status);
MPI_Send (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD);
- MPI_Recv (buf1, buf_size, MPI_INT,
+ MPI_Recv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Wait (&req, &status);
{
//sleep (60);
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Recv (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD, &status);
MPI_Send (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD);
- MPI_Recv (buf1, buf_size, MPI_INT,
+ MPI_Recv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Wait (&req, &status);
}
else if (rank == 0)
{
- MPI_Recv (buf1, buf_size, MPI_INT,
+ MPI_Recv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
MPI_Recv (buf0, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &status);
{
memset (buf0, 0, buf_size);
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Wait (&req, &status);
{
memset (buf1, 1, buf_size);
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Wait (&req, &status);
{
memset (buf0, 0, buf_size);
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
printf("Proc 0: Request number - %p\n",req);
-
+
MPI_Send (buf0, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD);
MPI_Recv (buf0, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &status);
{
memset (buf1, 1, buf_size);
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
printf("Proc 1: Request number - %p\n",req);
}
else if (rank == 0)
{
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[0]);
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[1]);
MPI_Waitall (2, reqs, statuses);
}
else if (rank == 0)
{
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[0]);
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[1]);
MPI_Waitall (2, reqs, statuses);
}
else if (rank == 0)
{
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[0]);
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[1]);
MPI_Waitall (2, reqs, statuses);
}
else if (rank == 0)
{
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[0]);
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[1]);
MPI_Waitany (2, reqs, &done, &status);
}
else if (rank == 0)
{
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[0]);
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[1]);
MPI_Waitall (2, reqs, statuses);
}
else if (rank == 2)
{
- // sleep (60);
+ // sleep (60);
MPI_Recv (buf1, buf_size, MPI_INT, 1, 1, MPI_COMM_WORLD, statuses);
}
}
else {
MPI_Comm_dup (MPI_COMM_WORLD, &comm);
-
+
if (rank == 0) {
memset (buf0, 0, buf_size);
MPI_Recv (buf1, buf_size, MPI_INT, 1, 0, comm, &status);
-
+
MPI_Send (buf0, buf_size, MPI_INT, 1, 0, comm);
}
else if (rank == 1) {
}
else {
MPI_Comm_split (MPI_COMM_WORLD, rank % 2, nprocs - rank, &comm);
-
+
if (comm != MPI_COMM_NULL) {
MPI_Comm_size (comm, &dnprocs);
MPI_Comm_rank (comm, &drank);
}
else {
/* create the graph on p.268 MPI: The Complete Reference... */
- MPI_Graph_create (MPI_COMM_WORLD, GRAPH_SZ,
+ MPI_Graph_create (MPI_COMM_WORLD, GRAPH_SZ,
graph_index, graph_edges, 1, &comm);
if (comm != MPI_COMM_NULL) {
INTERCOMM_CREATE_TAG, &intercomm);
MPI_Comm_free (&temp);
-
+
if (intercomm != MPI_COMM_NULL) {
MPI_Comm_size (intercomm, &dnprocs);
MPI_Comm_rank (intercomm, &drank);
MPI_Comm_remote_size (intercomm, &rnprocs);
-
+
if (rnprocs > drank) {
if (rank % 2) {
memset (buf1, 1, buf_size);
}
else {
memset (buf0, 0, buf_size);
-
+
MPI_Recv (buf1, buf_size, MPI_INT, drank, 0, intercomm, &status);
MPI_Send (buf0, buf_size, MPI_INT, drank, 0, intercomm);
INTERCOMM_CREATE_TAG, &intercomm);
MPI_Comm_free (&temp);
-
+
if (intercomm == MPI_COMM_NULL) {
printf ("(%d) MPI_Intercomm_Create returned MPI_COMM_NULL\n", rank);
printf ("(%d) Aborting...\n", rank);
if (comm != MPI_COMM_NULL) {
MPI_Comm_size (comm, &dnprocs);
MPI_Comm_rank (comm, &drank);
-
+
if (dnprocs > 1) {
if (drank == 0) {
memset (buf0, 0, buf_size);
-
+
MPI_Recv (buf1, buf_size, MPI_INT, 1, 0, comm, &status);
MPI_Send (buf0, buf_size, MPI_INT, 1, 0, comm);
printf ("(%d) is alive on %s\n", rank, processor_name);
fflush (stdout);
- MPI_Buffer_attach (bbuf, sizeof(int) *
+ MPI_Buffer_attach (bbuf, sizeof(int) *
(BUF_SIZE + MPI_BSEND_OVERHEAD) * 2 * NUM_BSEND_TYPES);
if (rank == 0) {
/* set up persistent sends... */
send_t_number = NUM_SEND_TYPES - NUM_PERSISTENT_SEND_TYPES;
- MPI_Send_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
+ MPI_Send_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
- MPI_Send_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ MPI_Send_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
- MPI_Bsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
+ MPI_Bsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
- MPI_Bsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ MPI_Bsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
- MPI_Rsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
+ MPI_Rsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
- MPI_Rsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ MPI_Rsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
- MPI_Ssend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
+ MPI_Ssend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
- MPI_Ssend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ MPI_Ssend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
}
if (rank == 0) {
/* set up transient sends... */
send_t_number = 0;
-
+
MPI_Isend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
MPI_Isend (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
-
+
MPI_Ibsend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
MPI_Ibsend (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
MPI_Irsend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
MPI_Irsend (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
MPI_Issend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
MPI_Issend (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
/* just to be paranoid */
MPI_Start (&aReq[2 * send_t_number + j]);
}
}
-
+
/* NOTE: Changing the send buffer of a Bsend is NOT an error... */
for (j = 0; j < NUM_SEND_TYPES; j++) {
/* muck the buffers */
outcount = 0;
while (!outcount) {
- MPI_Testsome (NUM_SEND_TYPES * 2, aReq,
+ MPI_Testsome (NUM_SEND_TYPES * 2, aReq,
&outcount, indices, aStatus);
}
else if (rank == 1) {
/* set up receives for all of the sends */
for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
- MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE,
+ MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE,
MPI_INT, 0, j, comm, &aReq[j]);
}
MPI_Buffer_detach (bbuf, &at_size);
- assert (at_size ==
+ assert (at_size ==
sizeof(int) * (BUF_SIZE + MPI_BSEND_OVERHEAD) * 2 * NUM_BSEND_TYPES);
MPI_Finalize ();
int namelen = 128;
int i, j, k, basic_extent;
int blocklens[4], displs[4];
- MPI_Datatype structtypes[4];
- MPI_Datatype newtype[TYPE_CONSTRUCTOR_COUNT];
+ MPI_Datatype structtypes[4];
+ MPI_Datatype newtype[TYPE_CONSTRUCTOR_COUNT];
MPI_Request aReq[TYPE_CONSTRUCTOR_COUNT];
MPI_Status aStatus[TYPE_CONSTRUCTOR_COUNT];
#ifdef RUN_TYPE_STRUCT
/* create the types */
MPI_Type_struct (2, blocklens, displs, structtypes, &newtype[0]);
-
+
MPI_Type_extent (newtype[0], &basic_extent);
if (basic_extent != sizeof (test_basic_struct_t)) {
fprintf (stderr, "(%d): Unexpect extent for struct\n");
}
MPI_Type_vector (2, 3, 4, newtype[0], &newtype[1]);
- MPI_Type_hvector (3, 2, 15 * sizeof (test_basic_struct_t),
+ MPI_Type_hvector (3, 2, 15 * sizeof (test_basic_struct_t),
newtype[1], &newtype[2]);
displs[1] = 2;
MPI_Type_indexed (2, blocklens, displs, newtype[2], &newtype[3]);
MPI_Type_struct (4, blocklens, displs, structtypes, &newtype[6]);
-#ifdef RUN_TYPE_STRUCT
+#ifdef RUN_TYPE_STRUCT
MPI_Type_commit (&newtype[0]);
#endif
MPI_Type_commit (&newtype[5]);
#endif
-#ifdef RUN_TYPE_STRUCT_LB_UB
+#ifdef RUN_TYPE_STRUCT_LB_UB
MPI_Type_commit (&newtype[6]);
#endif
#endif
#ifdef RUN_TYPE_STRUCT
- MPI_Isend (&(struct_lb_ub_send_buf[0].the_double_to_send),
+ MPI_Isend (&(struct_lb_ub_send_buf[0].the_double_to_send),
MSG_COUNT, newtype[6], 1, 6, comm, &aReq[6]);
#else
aReq[6] = MPI_REQUEST_NULL;
#endif
#ifdef RUN_TYPE_STRUCT_LB_UB
- MPI_Irecv (struct_lb_ub_recv_buf,
+ MPI_Irecv (struct_lb_ub_recv_buf,
MSG_COUNT, newtype[0], 0, 6, comm, &aReq[6]);
#else
aReq[6] = MPI_REQUEST_NULL;
#ifdef RUN_TYPE_HVECTOR
/* eight holes in hvector_buf... */
/* hole in first vector, first block... */
- assert ((hvector_buf[i*44 + 3].the_double == 2.0) &&
+ assert ((hvector_buf[i*44 + 3].the_double == 2.0) &&
(hvector_buf[i*44 + 3].the_char == 'b'));
/* hole in second vector, first block... */
assert ((hvector_buf[i*44 + 10].the_double == 2.0) &&
if (*dptr == MPI_BYTE)
stop = (*len)/(2 * sizeof(double));
- else
+ else
stop = *len;
for (i = 0; i < stop; i++) {
c.real = inout->real * in->real - inout->imag * in->imag;
c.imag = inout->real * in->imag + inout->imag * in->real;
*inout = c;
- in++;
+ in++;
inout++;
}
buf0 = (int *) malloc (buf_size * nprocs * sizeof(int));
assert (buf0);
- for (i = 0; i < buf_size * nprocs; i++)
+ for (i = 0; i < buf_size * nprocs; i++)
buf0[i] = rank;
#ifdef RUN_ALLTOALLV
if (rank == 0) {
buf1 = (int *) malloc (buf_size * nprocs * sizeof(int));
assert (buf1);
- for (i = 0; i < buf_size * nprocs; i++)
+ for (i = 0; i < buf_size * nprocs; i++)
buf1[i] = i;
displs = (int *) malloc (nprocs * sizeof(int));
#endif
#ifdef RUN_BARRIER
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Barrier (comm);
#endif
#ifdef RUN_BCAST
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Bcast (buf0, buf_size, MPI_INT, 0, comm);
#endif
#ifdef RUN_GATHER
- for (i = 0; i < OP_COUNT; i++)
- MPI_Gather (&buf0[rank*buf_size], buf_size,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Gather (&buf0[rank*buf_size], buf_size,
MPI_INT, buf1, buf_size, MPI_INT, 0, comm);
#endif
#ifdef RUN_SCATTER
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scatter (buf1, buf_size, MPI_INT, buf0, buf_size, MPI_INT, 0, comm);
#endif
#ifdef RUN_GATHERV
- for (i = 0; i < OP_COUNT; i++)
- MPI_Gatherv (&buf0[rank*buf_size],
- (rank < buf_size) ? rank : buf_size,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Gatherv (&buf0[rank*buf_size],
+ (rank < buf_size) ? rank : buf_size,
MPI_INT, buf1, rcounts, displs, MPI_INT, 0, comm);
#endif
#ifdef RUN_SCATTERV
- for (i = 0; i < OP_COUNT; i++)
- MPI_Scatterv (buf1, counts, displs, MPI_INT, buf0,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Scatterv (buf1, counts, displs, MPI_INT, buf0,
(rank < buf_size) ? rank : buf_size, MPI_INT, 0, comm);
#endif
#ifdef RUN_REDUCE
#ifdef RUN_MAX
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_MAX, 0, comm);
#endif
#ifdef RUN_MIN
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_MIN, 0, comm);
#endif
#ifdef RUN_SUM
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_SUM, 0, comm);
#endif
#ifdef RUN_PROD
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_PROD, 0, comm);
#endif
#ifdef RUN_LAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_LAND, 0, comm);
#endif
#ifdef RUN_BAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_BAND, 0, comm);
#endif
#ifdef RUN_LOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_LOR, 0, comm);
#endif
#ifdef RUN_BOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_BOR, 0, comm);
#endif
#ifdef RUN_LXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_LXOR, 0, comm);
#endif
#ifdef RUN_BXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_BXOR, 0, comm);
#endif
#ifdef RUN_USEROP
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (a, answer, buf_size, ctype, user_op, 0, comm);
#endif
#endif
#ifdef RUN_ALLGATHER
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allgather (buf0, buf_size, MPI_INT, buf1, buf_size, MPI_INT, comm);
#endif
#ifdef RUN_ALLTOALL
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Alltoall (buf1, buf_size, MPI_INT, buf0, buf_size, MPI_INT, comm);
#endif
#ifdef RUN_ALLGATHERV
- for (i = 0; i < OP_COUNT; i++)
- MPI_Allgatherv (buf0,
- (rank < buf_size) ? rank : buf_size,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Allgatherv (buf0,
+ (rank < buf_size) ? rank : buf_size,
MPI_INT, buf1, rcounts, displs, MPI_INT, comm);
#endif
#ifdef RUN_ALLTOALLV
- for (i = 0; i < OP_COUNT; i++)
- MPI_Alltoallv (buf1, alltoallvcounts, displs, MPI_INT,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Alltoallv (buf1, alltoallvcounts, displs, MPI_INT,
buf0, alltoallvcounts, displs, MPI_INT, comm);
#endif
#ifdef RUN_ALLREDUCE
#ifdef RUN_MAX
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_MAX, comm);
#endif
#ifdef RUN_MIN
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_MIN, comm);
#endif
#ifdef RUN_SUM
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_SUM, comm);
#endif
#ifdef RUN_PROD
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_PROD, comm);
#endif
#ifdef RUN_LAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_LAND, comm);
#endif
#ifdef RUN_BAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_BAND, comm);
#endif
#ifdef RUN_LOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_LOR, comm);
#endif
#ifdef RUN_BOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_BOR, comm);
#endif
#ifdef RUN_LXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_LXOR, comm);
#endif
#ifdef RUN_BXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_BXOR, comm);
#endif
#ifdef RUN_USEROP
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (a, answer, buf_size, ctype, user_op, comm);
#endif
#endif
#ifdef RUN_REDUCE_SCATTER
#ifdef RUN_MAX
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_MAX, comm);
#endif
#ifdef RUN_MIN
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_MIN, comm);
#endif
#ifdef RUN_SUM
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_SUM, comm);
#endif
#ifdef RUN_PROD
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_PROD, comm);
#endif
#ifdef RUN_LAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_LAND, comm);
#endif
#ifdef RUN_BAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_BAND, comm);
#endif
#ifdef RUN_LOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_LOR, comm);
#endif
#ifdef RUN_BOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_BOR, comm);
#endif
#ifdef RUN_LXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_LXOR, comm);
#endif
#ifdef RUN_BXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_BXOR, comm);
#endif
#ifdef RUN_USEROP
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (a, answer, rcounts, ctype, user_op, comm);
#endif
#endif
#ifdef RUN_SCAN
#ifdef RUN_MAX
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_MAX, comm);
#endif
#ifdef RUN_MIN
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_MIN, comm);
#endif
#ifdef RUN_SUM
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_SUM, comm);
#endif
#ifdef RUN_PROD
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_PROD, comm);
#endif
#ifdef RUN_LAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_LAND, comm);
#endif
#ifdef RUN_BAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_BAND, comm);
#endif
#ifdef RUN_LOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_LOR, comm);
#endif
#ifdef RUN_BOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_BOR, comm);
#endif
#ifdef RUN_LXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_LXOR, comm);
#endif
#ifdef RUN_BXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_BXOR, comm);
#endif
#ifdef RUN_USEROP
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (a, answer, buf_size, ctype, user_op, comm);
#endif
#endif
int *ricounts, *rdcounts;
#ifdef RUN_BARRIER
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Barrier (comm);
#endif
#ifdef RUN_BCAST
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Bcast (buf0, buf_size * sizeof(int), MPI_BYTE, 0, comm);
#endif
#ifdef RUN_GATHER
- for (i = 0; i < OP_COUNT; i++)
- MPI_Gather (&buf0[rank*buf_size], buf_size * sizeof(int),
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Gather (&buf0[rank*buf_size], buf_size * sizeof(int),
MPI_BYTE, buf1, buf_size * sizeof(int), MPI_BYTE, 0, comm);
#endif
#ifdef RUN_SCATTER
- for (i = 0; i < OP_COUNT; i++)
- MPI_Scatter (buf1, buf_size * sizeof(int), MPI_BYTE,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Scatter (buf1, buf_size * sizeof(int), MPI_BYTE,
buf0, buf_size * sizeof(int), MPI_BYTE, 0, comm);
#endif
#ifdef RUN_GATHERV
- for (i = 0; i < OP_COUNT; i++)
- MPI_Gatherv (&buf0[rank*buf_size],
- ((rank < buf_size) ? rank : buf_size) * sizeof(int),
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Gatherv (&buf0[rank*buf_size],
+ ((rank < buf_size) ? rank : buf_size) * sizeof(int),
MPI_BYTE, buf1, rcounts, displs, MPI_BYTE, 0, comm);
#endif
#ifdef RUN_SCATTERV
- for (i = 0; i < OP_COUNT; i++)
- MPI_Scatterv (buf1, counts, displs, MPI_BYTE, buf0,
- ((rank < buf_size) ? rank : buf_size) * sizeof(int),
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Scatterv (buf1, counts, displs, MPI_BYTE, buf0,
+ ((rank < buf_size) ? rank : buf_size) * sizeof(int),
MPI_BYTE, 0, comm);
#endif
#ifdef RUN_REDUCE
#ifdef RUN_MAX
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_MAX, 0, comm);
#endif
#ifdef RUN_MIN
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_MIN, 0, comm);
#endif
#ifdef RUN_SUM
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_SUM, 0, comm);
#endif
#ifdef RUN_PROD
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_PROD, 0, comm);
#endif
#ifdef RUN_LAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_LAND, 0, comm);
#endif
#ifdef RUN_BAND
- for (i = 0; i < OP_COUNT; i++)
- MPI_Reduce (buf0, buf1,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Reduce (buf0, buf1,
buf_size * sizeof(int), MPI_BYTE, MPI_BAND, 0, comm);
#endif
#ifdef RUN_LOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_LOR, 0, comm);
#endif
#ifdef RUN_BOR
- for (i = 0; i < OP_COUNT; i++)
- MPI_Reduce (buf0, buf1,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Reduce (buf0, buf1,
buf_size * sizeof(int), MPI_BYTE, MPI_BOR, 0, comm);
#endif
#ifdef RUN_LXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_LXOR, 0, comm);
#endif
#ifdef RUN_BXOR
- for (i = 0; i < OP_COUNT; i++)
- MPI_Reduce (buf0, buf1,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Reduce (buf0, buf1,
buf_size * sizeof(int), MPI_BYTE, MPI_BXOR, 0, comm);
#endif
#ifdef RUN_USEROP
- for (i = 0; i < OP_COUNT; i++)
- MPI_Reduce (a, answer,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Reduce (a, answer,
buf_size * 2 * sizeof(double), MPI_BYTE, user_op, 0, comm);
#endif
#endif
buf1 = (int *) malloc (buf_size * nprocs * sizeof(int));
assert (buf1);
- for (i = 0; i < buf_size * nprocs; i++)
+ for (i = 0; i < buf_size * nprocs; i++)
buf1[i] = i;
displs = (int *) malloc (nprocs * sizeof(int));
#endif
#ifdef RUN_ALLGATHER
- for (i = 0; i < OP_COUNT; i++)
- MPI_Allgather (buf0, buf_size * sizeof(int), MPI_BYTE,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Allgather (buf0, buf_size * sizeof(int), MPI_BYTE,
buf1, buf_size * sizeof(int), MPI_BYTE, comm);
#endif
#ifdef RUN_ALLTOALL
- for (i = 0; i < OP_COUNT; i++)
- MPI_Alltoall (buf1, buf_size * sizeof(int), MPI_BYTE,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Alltoall (buf1, buf_size * sizeof(int), MPI_BYTE,
buf0, buf_size * sizeof(int), MPI_BYTE, comm);
#endif
#ifdef RUN_ALLGATHERV
- for (i = 0; i < OP_COUNT; i++)
- MPI_Allgatherv (buf0,
- ((rank < buf_size) ? rank : buf_size) * sizeof(int),
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Allgatherv (buf0,
+ ((rank < buf_size) ? rank : buf_size) * sizeof(int),
MPI_BYTE, buf1, rcounts, displs, MPI_BYTE, comm);
#endif
#ifdef RUN_ALLTOALLV
- for (i = 0; i < OP_COUNT; i++)
- MPI_Alltoallv (buf1, alltoallvcounts, displs, MPI_BYTE,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Alltoallv (buf1, alltoallvcounts, displs, MPI_BYTE,
buf0, alltoallvcounts, displs, MPI_BYTE, comm);
#endif
#ifdef RUN_ALLREDUCE
#ifdef RUN_MAX
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_MAX, comm);
#endif
#ifdef RUN_MIN
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_MIN, comm);
#endif
#ifdef RUN_SUM
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_SUM, comm);
#endif
#ifdef RUN_PROD
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_PROD, comm);
#endif
#ifdef RUN_LAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_LAND, comm);
#endif
#ifdef RUN_BAND
- for (i = 0; i < OP_COUNT; i++)
- MPI_Allreduce (buf0, buf1,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Allreduce (buf0, buf1,
buf_size * sizeof (int), MPI_BYTE, MPI_BAND, comm);
#endif
#ifdef RUN_LOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_LOR, comm);
#endif
#ifdef RUN_BOR
- for (i = 0; i < OP_COUNT; i++)
- MPI_Allreduce (buf0, buf1,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Allreduce (buf0, buf1,
buf_size * sizeof (int), MPI_BYTE, MPI_BOR, comm);
#endif
#ifdef RUN_LXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_LXOR, comm);
#endif
#ifdef RUN_BXOR
- for (i = 0; i < OP_COUNT; i++)
- MPI_Allreduce (buf0, buf1,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Allreduce (buf0, buf1,
buf_size * sizeof (int), MPI_BYTE, MPI_BXOR, comm);
#endif
#ifdef RUN_USEROP
- for (i = 0; i < OP_COUNT; i++)
- MPI_Allreduce (a, answer,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Allreduce (a, answer,
buf_size * 2 * sizeof (double), MPI_BYTE, user_op, comm);
#endif
#endif
#ifdef RUN_REDUCE_SCATTER
#ifdef RUN_MAX
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, ricounts, MPI_UNSIGNED, MPI_MAX, comm);
#endif
#ifdef RUN_MIN
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, ricounts, MPI_UNSIGNED, MPI_MIN, comm);
#endif
#ifdef RUN_SUM
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, ricounts, MPI_UNSIGNED, MPI_SUM, comm);
#endif
#ifdef RUN_PROD
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, ricounts, MPI_UNSIGNED, MPI_PROD, comm);
#endif
#ifdef RUN_LAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, ricounts, MPI_UNSIGNED, MPI_LAND, comm);
#endif
#ifdef RUN_BAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_BYTE, MPI_BAND, comm);
#endif
#ifdef RUN_LOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, ricounts, MPI_UNSIGNED, MPI_LOR, comm);
#endif
#ifdef RUN_BOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_BYTE, MPI_BOR, comm);
#endif
#ifdef RUN_LXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, ricounts, MPI_UNSIGNED, MPI_LXOR, comm);
#endif
#ifdef RUN_BXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_BYTE, MPI_BXOR, comm);
#endif
#ifdef RUN_USEROP
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (a, answer, rdcounts, MPI_BYTE, user_op, comm);
#endif
#endif
#ifdef RUN_SCAN
#ifdef RUN_MAX
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_MAX, comm);
#endif
#ifdef RUN_MIN
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_MIN, comm);
#endif
#ifdef RUN_SUM
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_SUM, comm);
#endif
#ifdef RUN_PROD
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_PROD, comm);
#endif
#ifdef RUN_LAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_LAND, comm);
#endif
#ifdef RUN_BAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size * sizeof(int), MPI_BYTE, MPI_BAND, comm);
#endif
#ifdef RUN_LOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_LOR, comm);
#endif
#ifdef RUN_BOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size * sizeof(int), MPI_BYTE, MPI_BOR, comm);
#endif
#ifdef RUN_LXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_UNSIGNED, MPI_LXOR, comm);
#endif
#ifdef RUN_BXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size * sizeof(int), MPI_BYTE, MPI_BXOR, comm);
#endif
#ifdef RUN_USEROP
- for (i = 0; i < OP_COUNT; i++)
- MPI_Scan (a, answer,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Scan (a, answer,
buf_size * 2 * sizeof(double), MPI_BYTE, user_op, comm);
#endif
#endif
c.real = inout->real * in->real - inout->imag * in->imag;
c.imag = inout->real * in->imag + inout->imag * in->real;
*inout = c;
- in++;
+ in++;
inout++;
}
buf0 = (int *) malloc (buf_size * nprocs * sizeof(int));
assert (buf0);
- for (i = 0; i < buf_size * nprocs; i++)
+ for (i = 0; i < buf_size * nprocs; i++)
buf0[i] = rank;
#ifdef RUN_ALLTOALLV
if (rank == 0) {
buf1 = (int *) malloc (buf_size * nprocs * sizeof(int));
assert (buf1);
- for (i = 0; i < buf_size * nprocs; i++)
+ for (i = 0; i < buf_size * nprocs; i++)
buf1[i] = i;
displs = (int *) malloc (nprocs * sizeof(int));
}
#ifdef RUN_BARRIER
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Barrier (comm);
#endif
#ifdef RUN_BCAST
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Bcast (buf0, buf_size, MPI_INT, 0, comm);
#endif
#ifdef RUN_GATHER
- for (i = 0; i < OP_COUNT; i++)
- MPI_Gather (&buf0[rank*buf_size], buf_size,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Gather (&buf0[rank*buf_size], buf_size,
MPI_INT, buf1, buf_size, MPI_INT, 0, comm);
#endif
#ifdef RUN_SCATTER
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scatter (buf1, buf_size, MPI_INT, buf0, buf_size, MPI_INT, 0, comm);
#endif
#ifdef RUN_GATHERV
- for (i = 0; i < OP_COUNT; i++)
- MPI_Gatherv (&buf0[rank*buf_size],
- (rank < buf_size) ? rank : buf_size,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Gatherv (&buf0[rank*buf_size],
+ (rank < buf_size) ? rank : buf_size,
MPI_INT, buf1, rcounts, displs, MPI_INT, 0, comm);
#endif
#ifdef RUN_SCATTERV
- for (i = 0; i < OP_COUNT; i++)
- MPI_Scatterv (buf1, counts, displs, MPI_INT, buf0,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Scatterv (buf1, counts, displs, MPI_INT, buf0,
(rank < buf_size) ? rank : buf_size, MPI_INT, 0, comm);
#endif
#ifdef RUN_REDUCE
#ifdef RUN_MAX
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_MAX, 0, comm);
#endif
#ifdef RUN_MIN
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_MIN, 0, comm);
#endif
#ifdef RUN_SUM
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_SUM, 0, comm);
#endif
#ifdef RUN_PROD
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_PROD, 0, comm);
#endif
#ifdef RUN_LAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_LAND, 0, comm);
#endif
#ifdef RUN_BAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_BAND, 0, comm);
#endif
#ifdef RUN_LOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_LOR, 0, comm);
#endif
#ifdef RUN_BOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_BOR, 0, comm);
#endif
#ifdef RUN_LXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_LXOR, 0, comm);
#endif
#ifdef RUN_BXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_INT, MPI_BXOR, 0, comm);
#endif
#ifdef RUN_MAXLOC
if (nprocs > 1)
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_2INT, MPI_MAXLOC, 0, comm);
else
fprintf (stderr, "Not enough tasks for MAXLOC test\n");
#ifdef RUN_MINLOC
if (nprocs > 1)
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (buf0, buf1, buf_size, MPI_2INT, MPI_MINLOC, 0, comm);
else
fprintf (stderr, "Not enough tasks for MINLOC test\n");
#endif
#ifdef RUN_USEROP
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce (a, answer, buf_size, ctype, user_op, 0, comm);
#endif
#endif
if (rank != 0) {
buf1 = (int *) malloc (buf_size * nprocs * sizeof(int));
assert (buf1);
- for (i = 0; i < buf_size * nprocs; i++)
+ for (i = 0; i < buf_size * nprocs; i++)
buf1[i] = i;
displs = (int *) malloc (nprocs * sizeof(int));
}
#ifdef RUN_ALLGATHER
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allgather (buf0, buf_size, MPI_INT, buf1, buf_size, MPI_INT, comm);
#endif
#ifdef RUN_ALLTOALL
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Alltoall (buf1, buf_size, MPI_INT, buf0, buf_size, MPI_INT, comm);
#endif
#ifdef RUN_ALLGATHERV
- for (i = 0; i < OP_COUNT; i++)
- MPI_Allgatherv (buf0,
- (rank < buf_size) ? rank : buf_size,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Allgatherv (buf0,
+ (rank < buf_size) ? rank : buf_size,
MPI_INT, buf1, rcounts, displs, MPI_INT, comm);
#endif
#ifdef RUN_ALLTOALLV
- for (i = 0; i < OP_COUNT; i++)
- MPI_Alltoallv (buf1, alltoallvcounts, displs, MPI_INT,
+ for (i = 0; i < OP_COUNT; i++)
+ MPI_Alltoallv (buf1, alltoallvcounts, displs, MPI_INT,
buf0, alltoallvcounts, displs, MPI_INT, comm);
#endif
#ifdef RUN_ALLREDUCE
#ifdef RUN_MAX
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_MAX, comm);
#endif
#ifdef RUN_MIN
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_MIN, comm);
#endif
#ifdef RUN_SUM
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_SUM, comm);
#endif
#ifdef RUN_PROD
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_PROD, comm);
#endif
#ifdef RUN_LAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_LAND, comm);
#endif
#ifdef RUN_BAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_BAND, comm);
#endif
#ifdef RUN_LOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_LOR, comm);
#endif
#ifdef RUN_BOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_BOR, comm);
#endif
#ifdef RUN_LXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_LXOR, comm);
#endif
#ifdef RUN_BXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_INT, MPI_BXOR, comm);
#endif
#ifdef RUN_MAXLOC
if (nprocs > 1)
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_2INT, MPI_MAXLOC, comm);
else
fprintf (stderr, "Not enough tasks for MAXLOC test\n");
#ifdef RUN_MINLOC
if (nprocs > 1)
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (buf0, buf1, buf_size, MPI_2INT, MPI_MINLOC, comm);
else
fprintf (stderr, "Not enough tasks for MINLOC test\n");
#endif
#ifdef RUN_USEROP
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Allreduce (a, answer, buf_size, ctype, user_op, comm);
#endif
#endif
#ifdef RUN_REDUCE_SCATTER
#ifdef RUN_MAX
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_MAX, comm);
#endif
#ifdef RUN_MIN
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_MIN, comm);
#endif
#ifdef RUN_SUM
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_SUM, comm);
#endif
#ifdef RUN_PROD
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_PROD, comm);
#endif
#ifdef RUN_LAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_LAND, comm);
#endif
#ifdef RUN_BAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_BAND, comm);
#endif
#ifdef RUN_LOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_LOR, comm);
#endif
#ifdef RUN_BOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_BOR, comm);
#endif
#ifdef RUN_LXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_LXOR, comm);
#endif
#ifdef RUN_BXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_INT, MPI_BXOR, comm);
#endif
#ifdef RUN_MAXLOC
if (nprocs > 1)
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_2INT, MPI_MAXLOC, comm);
else
fprintf (stderr, "Not enough tasks for MAXLOC test\n");
#ifdef RUN_MINLOC
if (nprocs > 1)
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (buf0, buf1, rcounts, MPI_2INT, MPI_MINLOC, comm);
else
fprintf (stderr, "Not enough tasks for MINLOC test\n");
#endif
#ifdef RUN_USEROP
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Reduce_scatter (a, answer, rcounts, ctype, user_op, comm);
#endif
#endif
#ifdef RUN_SCAN
#ifdef RUN_MAX
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_MAX, comm);
#endif
#ifdef RUN_MIN
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_MIN, comm);
#endif
#ifdef RUN_SUM
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_SUM, comm);
#endif
#ifdef RUN_PROD
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_PROD, comm);
#endif
#ifdef RUN_LAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_LAND, comm);
#endif
#ifdef RUN_BAND
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_BAND, comm);
#endif
#ifdef RUN_LOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_LOR, comm);
#endif
#ifdef RUN_BOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_BOR, comm);
#endif
#ifdef RUN_LXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_LXOR, comm);
#endif
#ifdef RUN_BXOR
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_INT, MPI_BXOR, comm);
#endif
#ifdef RUN_MAXLOC
if (nprocs > 1)
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_2INT, MPI_MAXLOC, comm);
else
fprintf (stderr, "Not enough tasks for MAXLOC test\n");
#ifdef RUN_MINLOC
if (nprocs > 1)
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (buf0, buf1, buf_size, MPI_2INT, MPI_MINLOC, comm);
else
fprintf (stderr, "Not enough tasks for MINLOC test\n");
#endif
#ifdef RUN_USEROP
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Scan (a, answer, buf_size, ctype, user_op, comm);
#endif
#endif
MPI_Barrier (comm);
- for (i = 0; i < ERRHANDLER_COUNT; i++)
+ for (i = 0; i < ERRHANDLER_COUNT; i++)
MPI_Errhandler_create (myErrhandler, &newerrhandler[i]);
- for (i = 0; i < ERRHANDLER_COUNT; i++)
+ for (i = 0; i < ERRHANDLER_COUNT; i++)
MPI_Errhandler_free (&newerrhandler[i]);
MPI_Barrier (comm);
/* now with an alias... */
- for (i = 0; i < ERRHANDLER_COUNT; i++)
+ for (i = 0; i < ERRHANDLER_COUNT; i++)
MPI_Errhandler_create (myErrhandler, &newerrhandler[i]);
for (i = 0; i < ERRHANDLER_COUNT; i++) {
MPI_Barrier (comm);
- for (i = 0; i < ERRHANDLER_COUNT; i++)
+ for (i = 0; i < ERRHANDLER_COUNT; i++)
MPI_Errhandler_create (myErrhandler, &newerrhandler[i]);
MPI_Barrier (comm);
int namelen = 128;
int i;
int ranks[2], ranges[1][3];
- MPI_Group newgroup[GROUP_CONSTRUCTOR_COUNT];
- MPI_Group newgroup2[GROUP_CONSTRUCTOR_COUNT];
+ MPI_Group newgroup[GROUP_CONSTRUCTOR_COUNT];
+ MPI_Group newgroup2[GROUP_CONSTRUCTOR_COUNT];
MPI_Comm temp;
MPI_Comm intercomm = MPI_COMM_NULL;
MPI_Comm_group (MPI_COMM_WORLD, &newgroup[0]);
if (GROUP_CONSTRUCTOR_COUNT > 1)
- MPI_Group_incl (newgroup[0], 2, ranks, &newgroup[1]);
+ MPI_Group_incl (newgroup[0], 2, ranks, &newgroup[1]);
if (GROUP_CONSTRUCTOR_COUNT > 2)
MPI_Group_excl (newgroup[0], 2, ranks, &newgroup[2]);
if (GROUP_CONSTRUCTOR_COUNT > 3)
- MPI_Group_range_incl (newgroup[0], 1, ranges, &newgroup[3]);
+ MPI_Group_range_incl (newgroup[0], 1, ranges, &newgroup[3]);
if (GROUP_CONSTRUCTOR_COUNT > 4)
- MPI_Group_range_excl (newgroup[0], 1, ranges, &newgroup[4]);
+ MPI_Group_range_excl (newgroup[0], 1, ranges, &newgroup[4]);
if (GROUP_CONSTRUCTOR_COUNT > 5)
MPI_Group_union (newgroup[1], newgroup[3], &newgroup[5]);
MPI_Comm_split (MPI_COMM_WORLD, rank % 3, nprocs - rank, &temp);
if (rank % 3) {
- MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD,
+ MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD,
(((nprocs % 3) == 2) && ((rank % 3) == 2)) ?
nprocs - 1 : nprocs - (rank % 3) - (nprocs % 3),
INTERCOMM_CREATE_TAG, &intercomm);
MPI_Comm_free (&temp);
}
-
+
for (i = 0; i < GROUP_CONSTRUCTOR_COUNT; i++)
MPI_Group_free (&newgroup[i]);
MPI_Comm_group (MPI_COMM_WORLD, &newgroup[0]);
if (GROUP_CONSTRUCTOR_COUNT > 1)
- MPI_Group_incl (newgroup[0], 2, ranks, &newgroup[1]);
+ MPI_Group_incl (newgroup[0], 2, ranks, &newgroup[1]);
if (GROUP_CONSTRUCTOR_COUNT > 2)
MPI_Group_excl (newgroup[0], 2, ranks, &newgroup[2]);
if (GROUP_CONSTRUCTOR_COUNT > 3)
- MPI_Group_range_incl (newgroup[0], 1, ranges, &newgroup[3]);
+ MPI_Group_range_incl (newgroup[0], 1, ranges, &newgroup[3]);
if (GROUP_CONSTRUCTOR_COUNT > 4)
- MPI_Group_range_excl (newgroup[0], 1, ranges, &newgroup[4]);
+ MPI_Group_range_excl (newgroup[0], 1, ranges, &newgroup[4]);
if (GROUP_CONSTRUCTOR_COUNT > 5)
MPI_Group_union (newgroup[1], newgroup[3], &newgroup[5]);
MPI_Comm_split (MPI_COMM_WORLD, rank % 3, nprocs - rank, &temp);
if (rank % 3) {
- MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD,
+ MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD,
(((nprocs % 3) == 2) && ((rank % 3) == 2)) ?
nprocs - 1 : nprocs - (rank % 3) - (nprocs % 3),
INTERCOMM_CREATE_TAG, &intercomm);
MPI_Comm_free (&temp);
}
-
+
for (i = 0; i < GROUP_CONSTRUCTOR_COUNT; i++) {
newgroup2[i] = newgroup[i];
MPI_Group_free (&newgroup2[i]);
int namelen = 128;
int i;
int ranks[2], ranges[1][3];
- MPI_Group newgroup[GROUP_CONSTRUCTOR_COUNT];
- MPI_Group newgroup2[GROUP_CONSTRUCTOR_COUNT];
+ MPI_Group newgroup[GROUP_CONSTRUCTOR_COUNT];
+ MPI_Group newgroup2[GROUP_CONSTRUCTOR_COUNT];
MPI_Comm temp;
MPI_Comm intercomm = MPI_COMM_NULL;
MPI_Comm_group (MPI_COMM_WORLD, &newgroup[0]);
if (GROUP_CONSTRUCTOR_COUNT > 1)
- MPI_Group_incl (newgroup[0], 2, ranks, &newgroup[1]);
+ MPI_Group_incl (newgroup[0], 2, ranks, &newgroup[1]);
if (GROUP_CONSTRUCTOR_COUNT > 2)
MPI_Group_excl (newgroup[0], 2, ranks, &newgroup[2]);
if (GROUP_CONSTRUCTOR_COUNT > 3)
- MPI_Group_range_incl (newgroup[0], 1, ranges, &newgroup[3]);
+ MPI_Group_range_incl (newgroup[0], 1, ranges, &newgroup[3]);
if (GROUP_CONSTRUCTOR_COUNT > 4)
- MPI_Group_range_excl (newgroup[0], 1, ranges, &newgroup[4]);
+ MPI_Group_range_excl (newgroup[0], 1, ranges, &newgroup[4]);
if (GROUP_CONSTRUCTOR_COUNT > 5)
MPI_Group_union (newgroup[1], newgroup[3], &newgroup[5]);
MPI_Comm_split (MPI_COMM_WORLD, rank % 3, nprocs - rank, &temp);
if (rank % 3) {
- MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD,
+ MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD,
(((nprocs % 3) == 2) && ((rank % 3) == 2)) ?
nprocs - 1 : nprocs - (rank % 3) - (nprocs % 3),
INTERCOMM_CREATE_TAG, &intercomm);
MPI_Comm_free (&temp);
}
- }
+ }
MPI_Barrier (comm);
-/* -*- C -*-
+/* -*- C -*-
@PROLOGUE@
- -----
+ -----
Jeffrey Vetter vetter@llnl.gov
Center for Applied Scientific Computing, LLNL
}
MPI_Comm_free (&temp);
-
+
if (intercomm != MPI_COMM_NULL) {
MPI_Comm_size (intercomm, &dnprocs);
MPI_Comm_rank (intercomm, &drank);
MPI_Comm_remote_size (intercomm, &rnprocs);
-
+
if (rnprocs > drank) {
if (rank % 2) {
memset (buf1, 1, buf_size);
memset (buf0, 0, buf_size);
MPI_Send (buf0, buf_size, MPI_INT, drank, 0, intercomm);
-
+
MPI_Recv (buf1, buf_size, MPI_INT, drank, 0, intercomm, &status);
}
}
rleader = ((rank + nprocs) % 2) ? nprocs - 2 : nprocs - 1;
if ((trank == 0) && (rank % 2)) {
- MPI_Recv (buf0, buf_size, MPI_INT,
+ MPI_Recv (buf0, buf_size, MPI_INT,
rleader, 0, MPI_COMM_WORLD, &status);
}
}
MPI_Comm_free (&temp);
-
+
if (intercomm != MPI_COMM_NULL) {
MPI_Comm_size (intercomm, &dnprocs);
MPI_Comm_rank (intercomm, &drank);
MPI_Comm_remote_size (intercomm, &rnprocs);
-
+
if (rnprocs > drank) {
if (rank % 2) {
memset (buf1, 1, buf_size);
memset (buf0, 0, buf_size);
MPI_Send (buf0, buf_size, MPI_INT, drank, 0, intercomm);
-
+
MPI_Recv (buf1, buf_size, MPI_INT, drank, 0, intercomm, &status);
}
}
}
MPI_Comm_free (&temp);
-
+
if (intercomm != MPI_COMM_NULL) {
MPI_Comm_size (intercomm, &dnprocs);
MPI_Comm_rank (intercomm, &drank);
MPI_Comm_remote_size (intercomm, &rnprocs);
-
+
if (rnprocs > drank) {
if (rank % 2) {
memset (buf1, 1, buf_size);
memset (buf0, 0, buf_size);
MPI_Send (buf0, buf_size, MPI_INT, drank, 0, intercomm);
-
+
MPI_Recv (buf1, buf_size, MPI_INT, drank, 0, intercomm, &status);
}
}
}
MPI_Comm_free (&temp);
-
+
if (intercomm != MPI_COMM_NULL) {
MPI_Comm_size (intercomm, &dnprocs);
MPI_Comm_rank (intercomm, &drank);
MPI_Comm_remote_size (intercomm, &rnprocs);
-
+
if (rnprocs > drank) {
if (rank % 2) {
memset (buf1, 1, buf_size);
memset (buf0, 0, buf_size);
MPI_Send (buf0, buf_size, MPI_INT, drank, 0, intercomm);
-
+
MPI_Recv (buf1, buf_size, MPI_INT, drank, 0, intercomm, &status);
}
}
/* need lots of stuff for this constructor... */
MPI_Comm_split (MPI_COMM_WORLD, rank % 2, nprocs - rank, &temp);
- MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD,
- nprocs -
+ MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD,
+ nprocs -
((rank % 2) ? 2 - (nprocs % 2) : 1 + (nprocs % 2)),
INTERCOMM_CREATE_TAG, &intercomm);
-
+
MPI_Comm_free (&intercomm);
MPI_Comm_free (&temp);
MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD, rleader,
INTERCOMM_CREATE_TAG, &intercomm);
MPI_Comm_free (&temp);
-
+
if (intercomm != MPI_COMM_NULL) {
/* need to make a different split communicator temporarily... */
- MPI_Comm_split (MPI_COMM_WORLD,
+ MPI_Comm_split (MPI_COMM_WORLD,
rank < nprocs/2, nprocs - rank, &temp);
if (temp != MPI_COMM_NULL) {
MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD, rleader,
INTERCOMM_CREATE_TAG, &intercomm2);
MPI_Comm_free (&temp);
-
+
if (intercomm2 != MPI_COMM_NULL) {
if (rank < nprocs/2) {
MPI_Intercomm_merge (intercomm2, rank < nprocs/2, &comm2);
memset (buf0, 0, buf_size);
MPI_Send (buf0, buf_size, MPI_INT, 1, 0, comm);
-
+
MPI_Recv (buf1, buf_size, MPI_INT, 1, 0, comm, &status);
}
}
memset (buf0, 0, buf_size);
MPI_Send (buf0, buf_size, MPI_INT, 1, 0, comm2);
-
+
MPI_Recv (buf1, buf_size, MPI_INT, 1, 0, comm2, &status);
}
}
int src = (rank == 0) ? (nprocs - 1) : (rank - 1);
memset (buf0, rank, buf_size);
memset (buf1, rank, buf_size);
-
+
for (i = 0; i < req_count; i++) {
MPI_Irecv (buf0, buf_size, MPI_INT, src, 0, MPI_COMM_WORLD, &req1);
MPI_Isend (buf1, buf_size, MPI_INT, dest, 0, MPI_COMM_WORLD, &req2);
int done;
rc = MPI_Init(&argc,&argv);
- if (rc != MPI_SUCCESS)
+ if (rc != MPI_SUCCESS)
{
printf ("Error starting MPI program. Terminating.\n");
MPI_Abort(MPI_COMM_WORLD, rc);
MPI_Get_processor_name (processor_name, &namelen);
printf ("(%d) is alive on %s\n", rank, processor_name);
fflush (stdout);
-
+
/* Must be multiple of two for this test */
if ((numtasks & 1) != 0)
{
}
else if (split_rank == 1)
{
- rc = MPI_Irecv (&recv_int, 1, MPI_INT, 0, 0, split_comm,
+ rc = MPI_Irecv (&recv_int, 1, MPI_INT, 0, 0, split_comm,
&request);
if (rc != MPI_SUCCESS)
{
memset (buf, 1, buf_size * REQS_PER_ITERATION);
for (j = 0; j < REQS_PER_ITERATION; j++) {
- MPI_Isend (&buf[j*buf_size], buf_size, MPI_INT,
+ MPI_Isend (&buf[j*buf_size], buf_size, MPI_INT,
1, j, MPI_COMM_WORLD, &req[j]);
}
memset (buf, 2, buf_size * REQS_PER_ITERATION);
for (j = 0; j < REQS_PER_ITERATION; j++) {
- MPI_Irecv (&buf[j*buf_size], buf_size, MPI_INT,
+ MPI_Irecv (&buf[j*buf_size], buf_size, MPI_INT,
0, j, MPI_COMM_WORLD, &req[j]);
}
else {
if (rank == 0) {
for (i = 1; i < nprocs; i++) {
- MPI_Recv (buf, buf_size, MPI_INT,
+ MPI_Recv (buf, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
}
}
#define DCOMM_CALL_COUNT 7 /* MPI_Cart_create; MPI_Cart_sub;
MPI_Comm_create; MPI_Comm_dup;
MPI_Comm_split; MPI_Graph_create;
- and MPI_Intercomm_merge; store
+ and MPI_Intercomm_merge; store
MPI_Intercomm_create separately... */
#define TWOD 2
#define GRAPH_SZ 4
temp = dcomms[2];
}
if (rank % 3) {
- MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD,
+ MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD,
(((nprocs % 3) == 2) && ((rank % 3) == 2)) ?
nprocs - 1 : nprocs - (rank % 3) - (nprocs % 3),
INTERCOMM_CREATE_TAG, &intercomm);
if (DCOMM_CALL_COUNT > 4) {
#ifdef RUN_GRAPH_CREATE
/* create the graph on p.268 MPI: The Complete Reference... */
- MPI_Graph_create (MPI_COMM_WORLD, GRAPH_SZ,
+ MPI_Graph_create (MPI_COMM_WORLD, GRAPH_SZ,
graph_index, graph_edges, 1, &dcomms[4]);
#else
dcomms[4] = MPI_COMM_NULL;
/* create an intercommunicator and merge it... */
if (rank % 3) {
#ifndef RUN_INTERCOMM_CREATE
- MPI_Intercomm_create (dcomms[2], 0, MPI_COMM_WORLD,
+ MPI_Intercomm_create (dcomms[2], 0, MPI_COMM_WORLD,
(((nprocs % 3) == 2) && ((rank % 3) == 2)) ?
nprocs - 1 : nprocs - (rank % 3) - (nprocs % 3),
INTERCOMM_CREATE_TAG, &intercomm);
}
else {
dcomms[6] = MPI_COMM_NULL;
- }
+ }
#ifndef RUN_INTERCOMM_CREATE
#ifndef RUN_COMM_SPLIT
if (dcomms[2] != MPI_COMM_NULL)
if (dnprocs[i] > 1) {
if (drank[i] == 0) {
for (j = 1; j < dnprocs[i]; j++) {
- MPI_Recv (buf, buf_size, MPI_INT,
+ MPI_Recv (buf, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, dcomms[i], &status);
}
}
/* do any source receives on the intercomm... */
if ((rank % 3) == 1) {
for (j = 0; j < intersize; j++) {
- MPI_Recv (buf, buf_size, MPI_INT,
+ MPI_Recv (buf, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, intercomm, &status);
}
}
for (i = rank; i < buf_size; i = i + 2)
buf[i] = i;
- MPI_Isend (&buf[rank], 1,
+ MPI_Isend (&buf[rank], 1,
strided_type, (rank + 1) % 2, 0, comm, &req);
- MPI_Recv (&buf[(rank + 1) % 2], 1,
+ MPI_Recv (&buf[(rank + 1) % 2], 1,
strided_type, (rank + 1) % 2, 0, comm, &status);
MPI_Wait (&req, &status);
MPI_Start (&aReq[j]);
}
}
-
+
/* complete the sends */
switch (k/2) {
case 0:
else if (rank == 1) {
/* set up receives for all of the sends */
for (j = 0; j < 2; j++) {
- MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE,
+ MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE,
MPI_INT, 0, j, comm, &aReq[j]);
}
/* complete all of the receives... */
MPI_Start (&aReq[j]);
}
}
-
+
/* complete the sends */
if (k < 2) {
/* use MPI_Test */
else if (rank == 1) {
/* set up receives for all of the sends */
for (j = 0; j < 2; j++) {
- MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE,
+ MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE,
MPI_INT, 0, j, comm, &aReq[j]);
}
/* complete all of the receives... */
MPI_Start (&aReq[j]);
}
}
-
+
/* complete the sends */
if (k < 2) {
/* use MPI_Testany */
else if (rank == 1) {
/* set up receives for all of the sends */
for (j = 0; j < 2; j++) {
- MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE,
+ MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE,
MPI_INT, 0, j, comm, &aReq[j]);
}
/* complete all of the receives... */
MPI_Start (&aReq[j]);
}
}
-
+
/* complete the sends */
if (k < 2) {
/* use MPI_Waitany */
else if (rank == 1) {
/* set up receives for all of the sends */
for (j = 0; j < 2; j++) {
- MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE,
+ MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE,
MPI_INT, 0, j, comm, &aReq[j]);
}
/* complete all of the receives... */
MPI_Start (&aReq[j]);
}
}
-
+
/* complete the sends */
if (k < 2)
/* use MPI_Wait */
for (j = 0; j < 2; j++)
MPI_Wait (&aReq[j], &aStatus[j]);
- else
+ else
/* use MPI_Waitall */
MPI_Waitall (2, aReq, aStatus);
}
else if (rank == 1) {
/* set up receives for all of the sends */
for (j = 0; j < 2; j++) {
- MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE,
+ MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE,
MPI_INT, 0, j, comm, &aReq[j]);
}
/* complete all of the receives... */
{
for (j = 0; j < 2; j++) {
MPI_Probe (MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
-
+
if (status.MPI_SOURCE == 0)
MPI_Recv (&i, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
- else
+ else
MPI_Recv (&x, 1, MPI_DOUBLE, 1, 0, MPI_COMM_WORLD, &status);
}
}
-
+
MPI_Barrier (MPI_COMM_WORLD);
MPI_Finalize ();
MPI_Wait (&req0, &status);
}
- else if (rank == 1)
+ else if (rank == 1)
{
for (j = 0; j < 2; j++) {
MPI_Probe (0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
-
+
if (status.MPI_TAG == 0)
MPI_Recv (&i, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
else
MPI_Recv (&x, 1, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD, &status);
}
}
-
+
MPI_Barrier (MPI_COMM_WORLD);
MPI_Finalize ();
MPI_Send (&i, 1, MPI_INT, 1, 0, MPI_COMM_WORLD);
MPI_Probe (1, 0, MPI_COMM_WORLD, &status);
-
+
MPI_Recv (&x, 1, MPI_DOUBLE, 1, 0, MPI_COMM_WORLD, &status);
}
else if (rank == 1)
MPI_Send (&x, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
}
-
+
MPI_Barrier (MPI_COMM_WORLD);
MPI_Finalize ();
MPI_Irecv (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &reqs[1]);
- while (!flag)
+ while (!flag)
MPI_Testall (2, reqs, &flag, statuses);
MPI_Send (buf1, buf_size, MPI_INT, 1, 1, MPI_COMM_WORLD);
MPI_Isend (buf0, buf_size, MPI_INT, 2, 1, MPI_COMM_WORLD, &reqs[1]);
- while (!flag)
+ while (!flag)
MPI_Testall (2, reqs, &flag, statuses);
MPI_Send (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD);
for (i = 1; i < buf_size; i = i + 2)
buf[i] = i;
- MPI_Recv (&buf[(rank + 1) % 2], 1,
+ MPI_Recv (&buf[(rank + 1) % 2], 1,
strided_type, (rank + 1) % 2, 0, comm, &status);
for (i = 0; i < buf_size; i++)
{
memset (buf0, 0, buf_size);
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Send (buf0, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD);
{
memset (buf1, 1, buf_size);
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Wait (&req, &status);
}
else if (rank == 0)
{
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &req);
MPI_Recv (buf0, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &status);
{
memset (buf0, 0, buf_size);
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Send (buf0, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD);
}
else if (rank == 1)
{
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);
MPI_Recv (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
}
else if (rank == 0)
{
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[0]);
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[1]);
MPI_Waitall (2, reqs, statuses);
}
else if (rank == 2)
{
- //sleep (60);
+ //sleep (60);
MPI_Recv (buf1, buf_size, MPI_INT, 1, 1, MPI_COMM_WORLD, statuses);
}
}
else if (rank == 0)
{
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[0]);
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[1]);
MPI_Waitall (2, reqs, statuses);
}
else if (rank == 0)
{
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[0]);
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[1]);
MPI_Waitall (2, reqs, statuses);
}
else if (rank == 0)
{
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[0]);
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[1]);
MPI_Waitall (2, reqs, statuses);
{
MPI_Request reqs[3];
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &reqs[0]);
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 2, MPI_COMM_WORLD, &reqs[1]);
- MPI_Irecv (buf2, buf_size, MPI_INT,
+ MPI_Irecv (buf2, buf_size, MPI_INT,
MPI_ANY_SOURCE, 3, MPI_COMM_WORLD, &reqs[2]);
for (i = 3; i > 0; i--) {
memset (buf0, 1, buf_size);
for (i = 3; i > 0; i--) {
- MPI_Recv (&flipbit, 1, MPI_INT,
+ MPI_Recv (&flipbit, 1, MPI_INT,
MPI_ANY_SOURCE, i, MPI_COMM_WORLD, &status);
MPI_Send (buf0, buf_size, MPI_INT, 0, i, MPI_COMM_WORLD);
}
else if (rank == 0)
{
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[0]);
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[1]);
MPI_Waitany (2, reqs, &done, &status);
}
else if (rank == 0)
{
- MPI_Irecv (buf0, buf_size, MPI_INT,
+ MPI_Irecv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[0]);
- MPI_Irecv (buf1, buf_size, MPI_INT,
+ MPI_Irecv (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[1]);
MPI_Waitall (2, reqs, statuses);
}
else if (rank == 2)
{
- sleep (60);
+ sleep (60);
MPI_Recv (buf1, buf_size, MPI_INT, 1, 1, MPI_COMM_WORLD, statuses);
}
else if (rank == 1) {
memset (buf1, 1, buf_size);
- MPI_Recv (buf0, buf_size, MPI_INT,
+ MPI_Recv (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
-
+
MPI_Send (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD);
}
}
memset (buf1, 1, buf_size);
MPI_Recv (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
-
+
MPI_Send (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD);
}
}
memset (buf0, 0, buf_size);
MPI_Send_init (buf0, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &aReq[0]);
- MPI_Recv_init (buf1, buf_size, MPI_INT,
+ MPI_Recv_init (buf1, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &aReq[1]);
MPI_Start (&aReq[0]);
else if (rank == 1) {
memset (buf1, 1, buf_size);
- MPI_Recv_init (buf0, buf_size, MPI_INT,
+ MPI_Recv_init (buf0, buf_size, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &aReq[0]);
MPI_Send_init (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &aReq[1]);
double real, imag;
} Complex;
-void
-myProd (void *inp, void *inoutp, int *len, MPI_Datatype *dptr)
+void
+myProd (void *inp, void *inoutp, int *len, MPI_Datatype *dptr)
{
int i;
Complex c;
MPI_Barrier (comm);
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Op_create (myProd, 1, &newop[i]);
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Op_free (&newop[i]);
MPI_Barrier (comm);
/* now with an alias... */
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Op_create (myProd, 1, &newop[i]);
for (i = 0; i < OP_COUNT; i++) {
double real, imag;
} Complex;
-void
-myProd (void *inp, void *inoutp, int *len, MPI_Datatype *dptr)
+void
+myProd (void *inp, void *inoutp, int *len, MPI_Datatype *dptr)
{
int i;
Complex c;
MPI_Barrier (comm);
- for (i = 0; i < OP_COUNT; i++)
+ for (i = 0; i < OP_COUNT; i++)
MPI_Op_create (myProd, 1, &newop[i]);
MPI_Barrier (comm);
printf ("(%d) is alive on %s\n", rank, processor_name);
fflush (stdout);
- MPI_Buffer_attach (bbuf, sizeof(int) *
+ MPI_Buffer_attach (bbuf, sizeof(int) *
(BUF_SIZE + MPI_BSEND_OVERHEAD) * 2 * NUM_BSEND_TYPES);
if (rank == 0) {
/* set up persistent sends... */
send_t_number = NUM_SEND_TYPES - NUM_PERSISTENT_SEND_TYPES;
- MPI_Send_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
+ MPI_Send_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
- MPI_Send_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ MPI_Send_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
- MPI_Bsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
+ MPI_Bsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
- MPI_Bsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ MPI_Bsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
- MPI_Rsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
+ MPI_Rsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
- MPI_Rsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ MPI_Rsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
- MPI_Ssend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
+ MPI_Ssend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
- MPI_Ssend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ MPI_Ssend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
}
if ((m == 1) && (rank == 1)) {
/* set up the persistent receives... */
for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
- MPI_Recv_init (&buf[j * BUF_SIZE],
- BUF_SIZE + (j % 2) * SLOP,
+ MPI_Recv_init (&buf[j * BUF_SIZE],
+ BUF_SIZE + (j % 2) * SLOP,
MPI_INT, 0, j, comm, &aReq[j]);
}
}
if (rank == 0) {
/* set up transient sends... */
send_t_number = 0;
-
+
MPI_Isend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
MPI_Isend (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
MPI_Ibsend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
MPI_Ibsend (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
/* Barrier to ensure receives are posted for rsends... */
MPI_Barrier(MPI_COMM_WORLD);
-
+
MPI_Irsend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
MPI_Irsend (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
MPI_Issend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
MPI_Issend (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
/* just to be paranoid */
send_t_number++;
- assert (send_t_number ==
+ assert (send_t_number ==
NUM_SEND_TYPES - NUM_PERSISTENT_SEND_TYPES);
/* start the persistent sends... */
if (k % 2) {
- MPI_Startall (NUM_PERSISTENT_SEND_TYPES * 2,
+ MPI_Startall (NUM_PERSISTENT_SEND_TYPES * 2,
&aReq[2 * send_t_number]);
}
else {
MPI_Wait (&aReq[j], &aStatus[j]);
}
break;
-
+
case 1:
/* use MPI_Waitall */
MPI_Waitall (NUM_SEND_TYPES * 2, aReq, aStatus);
for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
MPI_Waitany (NUM_SEND_TYPES * 2, aReq, &index, aStatus);
}
-
+
break;
case 3:
/* use MPI_Waitsome */
total = 0;
while (total < NUM_SEND_TYPES * 2) {
- MPI_Waitsome (NUM_SEND_TYPES * 2, aReq,
+ MPI_Waitsome (NUM_SEND_TYPES * 2, aReq,
&outcount, indices, aStatus);
total += outcount;
/* use MPI_Test */
for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
flag = 0;
-
+
while (!flag) {
MPI_Test (&aReq[j], &flag, &aStatus[j]);
}
}
-
+
break;
case 5:
while (!flag) {
MPI_Testall (NUM_SEND_TYPES * 2, aReq, &flag, aStatus);
}
-
+
break;
case 6:
for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
flag = 0;
while (!flag) {
- MPI_Testany (NUM_SEND_TYPES * 2, aReq,
+ MPI_Testany (NUM_SEND_TYPES * 2, aReq,
&index, &flag, aStatus);
}
}
outcount = 0;
while (!outcount) {
- MPI_Testsome (NUM_SEND_TYPES * 2, aReq,
+ MPI_Testsome (NUM_SEND_TYPES * 2, aReq,
&outcount, indices, aStatus);
}
/* start receives for all of the sends */
if (m == 0) {
for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
- MPI_Irecv (&buf[j * BUF_SIZE],
- BUF_SIZE + (j % 2) * SLOP,
+ MPI_Irecv (&buf[j * BUF_SIZE],
+ BUF_SIZE + (j % 2) * SLOP,
MPI_INT, 0, j, comm, &aReq[j]);
}
}
/* complete all of the receives... */
switch (l/2) {
- case 0:
+ case 0:
/* use MPI_Wait */
for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
MPI_Wait (&aReq[j], &aStatus[j]);
}
break;
-
+
case 1:
/* use MPI_Waitall */
MPI_Waitall (NUM_SEND_TYPES * 2, aReq, aStatus);
for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
MPI_Waitany (NUM_SEND_TYPES * 2, aReq, &index, aStatus);
}
-
+
break;
case 3:
/* use MPI_Waitsome */
total = 0;
while (total < NUM_SEND_TYPES * 2) {
- MPI_Waitsome (NUM_SEND_TYPES * 2, aReq,
+ MPI_Waitsome (NUM_SEND_TYPES * 2, aReq,
&outcount, indices, aStatus);
total += outcount;
}
break;
-
+
case 4:
/* use MPI_Test */
for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
flag = 0;
-
+
while (!flag) {
MPI_Test (&aReq[j], &flag, &aStatus[j]);
}
}
-
+
break;
-
+
case 5:
/* use MPI_Testall */
flag = 0;
while (!flag) {
MPI_Testall (NUM_SEND_TYPES * 2, aReq, &flag, aStatus);
}
-
+
break;
-
+
case 6:
/* use MPI_Testany */
for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
flag = 0;
while (!flag) {
- MPI_Testany (NUM_SEND_TYPES * 2, aReq,
+ MPI_Testany (NUM_SEND_TYPES * 2, aReq,
&index, &flag, aStatus);
}
}
-
+
break;
-
+
case 7:
/* use MPI_Testsome */
total = 0;
outcount = 0;
while (!outcount) {
- MPI_Testsome (NUM_SEND_TYPES * 2, aReq,
+ MPI_Testsome (NUM_SEND_TYPES * 2, aReq,
&outcount, indices, aStatus);
}
-
+
total += outcount;
}
-
+
break;
-
+
default:
assert (0);
break;
MPI_Buffer_detach (bbuf, &at_size);
- assert (at_size ==
+ assert (at_size ==
sizeof(int) * (BUF_SIZE + MPI_BSEND_OVERHEAD) * 2 * NUM_BSEND_TYPES);
MPI_Finalize ();
MPI_Testall (2, aReq, &flag, aStatus);
}
}
- }
+ }
else {
/* Barrier to ensure receives are posted for rsends... */
MPI_Barrier(MPI_COMM_WORLD);
}
}
}
- }
+ }
else {
/* Barrier to ensure receives are posted for rsends... */
MPI_Barrier(MPI_COMM_WORLD);
total += outcount;
}
}
- }
+ }
else {
/* Barrier to ensure receives are posted for rsends... */
MPI_Barrier(MPI_COMM_WORLD);
/* use MPI_Test */
for (j = 0; j < 2; j++) {
flag = 0;
-
+
while (!flag) {
MPI_Test (&aReq[j], &flag, &aStatus[j]);
}
}
}
- }
+ }
else {
/* Barrier to ensure receives are posted for rsends... */
MPI_Barrier(MPI_COMM_WORLD);
MPI_Aint basic_extent;
int blocklens[3];
MPI_Aint displs[3];
- MPI_Datatype structtypes[3];
- MPI_Datatype newtype[2];
+ MPI_Datatype structtypes[3];
+ MPI_Datatype newtype[2];
MPI_Request aReq[2];
MPI_Status aStatus[2];
test_small_struct_t small_struct_buf[SMALL_SIZE];
blocklens[0] = blocklens[1] = blocklens[2] = 1;
displs[0] = 0;
displs[1] = sizeof(double);
- displs[2] =
- ((void *) &(big_struct_buf[0].the_other_double)) -
+ displs[2] =
+ ((void *) &(big_struct_buf[0].the_other_double)) -
((void *) big_struct_buf);
if (displs[2] < 0) displs[2] = -displs[2];
/* create the types */
MPI_Type_struct (2, blocklens, displs, structtypes, &newtype[0]);
MPI_Type_struct (3, blocklens, displs, structtypes, &newtype[1]);
-
+
MPI_Type_extent (newtype[0], &basic_extent);
if (basic_extent != sizeof (test_small_struct_t)) {
fprintf (stderr, "(%d): Unexpected extent for small struct\n", rank);
MPI_Abort (MPI_COMM_WORLD, 666);
}
-
+
MPI_Type_extent (newtype[1], &basic_extent);
if (basic_extent != sizeof (test_big_struct_t)) {
fprintf (stderr, "(%d): Unexpected extent for big struct\n", rank);
{
for (j = 0; j < 2; j++) {
MPI_Probe (MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
-
+
if (status.MPI_SOURCE == 0) {
- MPI_Recv (&i, 1, MPI_INT,
+ MPI_Recv (&i, 1, MPI_INT,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
if (status.MPI_SOURCE != 0)
printf ("(%d) Type mismatch from matching other message\n", rank);
}
else {
- MPI_Recv (&x, 1, MPI_DOUBLE,
+ MPI_Recv (&x, 1, MPI_DOUBLE,
MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
if (status.MPI_SOURCE == 0)
printf ("(%d) Type mismatch from matching other message\n", rank);
}
}
}
-
+
MPI_Barrier (MPI_COMM_WORLD);
MPI_Finalize ();
MPI_Wait (&req0, &status);
}
- else if (rank == 1)
+ else if (rank == 1)
{
for (j = 0; j < 2; j++) {
MPI_Probe (0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
-
+
if (status.MPI_TAG == 0) {
MPI_Recv (&i, 1, MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
printf ("(%d) Type mismatch from matching other message\n", rank);
}
else {
- MPI_Recv (&x, 1, MPI_DOUBLE, 0,
+ MPI_Recv (&x, 1, MPI_DOUBLE, 0,
MPI_ANY_TAG, MPI_COMM_WORLD, &status);
if (status.MPI_TAG == 0)
}
}
}
-
+
MPI_Barrier (MPI_COMM_WORLD);
MPI_Finalize ();
i = 0;
MPI_Probe (1, 0, MPI_COMM_WORLD, &status);
-
+
MPI_Send (&i, 1, MPI_INT, 1, 0, MPI_COMM_WORLD);
MPI_Recv (&x, 1, MPI_DOUBLE, 1, 0, MPI_COMM_WORLD, &status);
MPI_Send (&x, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
}
-
+
MPI_Barrier (MPI_COMM_WORLD);
MPI_Finalize ();
printf ("(%d) is alive on %s\n", rank, processor_name);
fflush (stdout);
- MPI_Buffer_attach (bbuf, sizeof(int) *
+ MPI_Buffer_attach (bbuf, sizeof(int) *
(BUF_SIZE + MPI_BSEND_OVERHEAD) * 2 * NUM_BSEND_TYPES);
if (rank == 0) {
/* set up persistent sends... */
send_t_number = NUM_SEND_TYPES - NUM_PERSISTENT_SEND_TYPES;
- MPI_Send_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
+ MPI_Send_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
- MPI_Send_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ MPI_Send_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
- MPI_Bsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
+ MPI_Bsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
- MPI_Bsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ MPI_Bsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
- MPI_Rsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
+ MPI_Rsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
- MPI_Rsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ MPI_Rsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
- MPI_Ssend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
+ MPI_Ssend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
- MPI_Ssend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ MPI_Ssend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
}
if ((m == 1) && (rank == 1)) {
/* set up the persistent receives... */
for (j = 0; j < NUM_SEND_TYPES * 2; j+=2) {
- MPI_Recv_init (&buf[j * BUF_SIZE],
+ MPI_Recv_init (&buf[j * BUF_SIZE],
BUF_SIZE, MPI_INT, 0, j, comm, &aReq[j]);
- MPI_Recv_init (&buf[(j + 1) * BUF_SIZE],
- BUF_SIZE * sizeof(int),
+ MPI_Recv_init (&buf[(j + 1) * BUF_SIZE],
+ BUF_SIZE * sizeof(int),
MPI_BYTE, 0, j + 1, comm, &aReq[j + 1]);
}
}
if (rank == 0) {
/* set up transient sends... */
send_t_number = 0;
-
+
MPI_Isend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
MPI_Isend (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
MPI_Ibsend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
MPI_Ibsend (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
/* Barrier to ensure receives are posted for rsends... */
MPI_Barrier(MPI_COMM_WORLD);
-
+
MPI_Irsend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
MPI_Irsend (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
send_t_number++;
MPI_Issend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
MPI_Issend (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
- BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
+ BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1,
comm, &aReq[send_t_number * 2 + 1]);
/* just to be paranoid */
send_t_number++;
- assert (send_t_number ==
+ assert (send_t_number ==
NUM_SEND_TYPES - NUM_PERSISTENT_SEND_TYPES);
/* start the persistent sends... */
if (k % 2) {
- MPI_Startall (NUM_PERSISTENT_SEND_TYPES * 2,
+ MPI_Startall (NUM_PERSISTENT_SEND_TYPES * 2,
&aReq[2 * send_t_number]);
}
else {
MPI_Wait (&aReq[j], &aStatus[j]);
}
break;
-
+
case 1:
/* use MPI_Waitall */
MPI_Waitall (NUM_SEND_TYPES * 2, aReq, aStatus);
for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
MPI_Waitany (NUM_SEND_TYPES * 2, aReq, &index, aStatus);
}
-
+
break;
case 3:
/* use MPI_Waitsome */
total = 0;
while (total < NUM_SEND_TYPES * 2) {
- MPI_Waitsome (NUM_SEND_TYPES * 2, aReq,
+ MPI_Waitsome (NUM_SEND_TYPES * 2, aReq,
&outcount, indices, aStatus);
total += outcount;
/* use MPI_Test */
for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
flag = 0;
-
+
while (!flag) {
MPI_Test (&aReq[j], &flag, &aStatus[j]);
}
}
-
+
break;
case 5:
while (!flag) {
MPI_Testall (NUM_SEND_TYPES * 2, aReq, &flag, aStatus);
}
-
+
break;
case 6:
for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
flag = 0;
while (!flag) {
- MPI_Testany (NUM_SEND_TYPES * 2, aReq,
+ MPI_Testany (NUM_SEND_TYPES * 2, aReq,
&index, &flag, aStatus);
}
}
outcount = 0;
while (!outcount) {
- MPI_Testsome (NUM_SEND_TYPES * 2, aReq,
+ MPI_Testsome (NUM_SEND_TYPES * 2, aReq,
&outcount, indices, aStatus);
}
/* start receives for all of the sends */
if (m == 0) {
for (j = 0; j < NUM_SEND_TYPES * 2; j+=2) {
- MPI_Irecv (&buf[j * BUF_SIZE],
+ MPI_Irecv (&buf[j * BUF_SIZE],
BUF_SIZE, MPI_INT, 0, j, comm, &aReq[j]);
- MPI_Irecv (&buf[(j + 1) * BUF_SIZE],
- BUF_SIZE * sizeof(int),
+ MPI_Irecv (&buf[(j + 1) * BUF_SIZE],
+ BUF_SIZE * sizeof(int),
MPI_BYTE, 0, j + 1, comm, &aReq[j + 1]);
}
}
/* complete all of the receives... */
switch (l/2) {
- case 0:
+ case 0:
/* use MPI_Wait */
for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
MPI_Wait (&aReq[j], &aStatus[j]);
}
break;
-
+
case 1:
/* use MPI_Waitall */
MPI_Waitall (NUM_SEND_TYPES * 2, aReq, aStatus);
for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
MPI_Waitany (NUM_SEND_TYPES * 2, aReq, &index, aStatus);
}
-
+
break;
case 3:
/* use MPI_Waitsome */
total = 0;
while (total < NUM_SEND_TYPES * 2) {
- MPI_Waitsome (NUM_SEND_TYPES * 2, aReq,
+ MPI_Waitsome (NUM_SEND_TYPES * 2, aReq,
&outcount, indices, aStatus);
total += outcount;
}
break;
-
+
case 4:
/* use MPI_Test */
for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
flag = 0;
-
+
while (!flag) {
MPI_Test (&aReq[j], &flag, &aStatus[j]);
}
}
-
+
break;
-
+
case 5:
/* use MPI_Testall */
flag = 0;
while (!flag) {
MPI_Testall (NUM_SEND_TYPES * 2, aReq, &flag, aStatus);
}
-
+
break;
-
+
case 6:
/* use MPI_Testany */
for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
flag = 0;
while (!flag) {
- MPI_Testany (NUM_SEND_TYPES * 2, aReq,
+ MPI_Testany (NUM_SEND_TYPES * 2, aReq,
&index, &flag, aStatus);
}
}
-
+
break;
-
+
case 7:
/* use MPI_Testsome */
total = 0;
outcount = 0;
while (!outcount) {
- MPI_Testsome (NUM_SEND_TYPES * 2, aReq,
+ MPI_Testsome (NUM_SEND_TYPES * 2, aReq,
&outcount, indices, aStatus);
}
-
+
total += outcount;
}
-
+
break;
-
+
default:
assert (0);
break;
MPI_Buffer_detach (bbuf, &at_size);
- assert (at_size ==
+ assert (at_size ==
sizeof(int) * (BUF_SIZE + MPI_BSEND_OVERHEAD) * 2 * NUM_BSEND_TYPES);
MPI_Finalize ();
char processor_name[128];
int namelen = 128;
int i;
- MPI_Group newgroup;
- MPI_Group newgroup2;
+ MPI_Group newgroup;
+ MPI_Group newgroup2;
MPI_Comm temp;
MPI_Comm intercomm = MPI_COMM_NULL;
MPI_Comm_split (MPI_COMM_WORLD, rank % 3, nprocs - rank, &temp);
if (rank % 3) {
- MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD,
+ MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD,
(((nprocs % 3) == 2) && ((rank % 3) == 2)) ?
nprocs - 1 : nprocs - (rank % 3) - (nprocs % 3),
INTERCOMM_CREATE_TAG, &intercomm);
MPI_Comm_remote_group (intercomm, &newgroup);
-
+
MPI_Comm_free (&intercomm);
}
else {
}
MPI_Comm_free (&temp);
-
+
MPI_Group_free (&newgroup);
MPI_Barrier (comm);
MPI_Comm_split (MPI_COMM_WORLD, rank % 3, nprocs - rank, &temp);
if (rank % 3) {
- MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD,
+ MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD,
(((nprocs % 3) == 2) && ((rank % 3) == 2)) ?
nprocs - 1 : nprocs - (rank % 3) - (nprocs % 3),
INTERCOMM_CREATE_TAG, &intercomm);
}
MPI_Comm_free (&temp);
-
+
newgroup2 = newgroup;
MPI_Group_free (&newgroup2);
}
/* COMMITTING TWICE IS NOT AN ERROR - SEE:
http://www.mpi-forum.org/docs/mpi-20-html/node50.htm#Node50
- AT MOST, UMPIRE SHOULD PROVIDE A CLEAR WARNING ABOUT MINOR
+ AT MOST, UMPIRE SHOULD PROVIDE A CLEAR WARNING ABOUT MINOR
PERFORMANCE CONSEQUENCE (JUST A WASTED FUNCTION CALL)... */
MPI_Type_commit (&newtype);
int namelen = 128;
int i, j, k, basic_extent;
int blocklens[4], displs[4];
- MPI_Datatype structtypes[4];
- MPI_Datatype newtype[TYPE_CONSTRUCTOR_COUNT];
+ MPI_Datatype structtypes[4];
+ MPI_Datatype newtype[TYPE_CONSTRUCTOR_COUNT];
MPI_Request aReq[TYPE_CONSTRUCTOR_COUNT];
MPI_Status aStatus[TYPE_CONSTRUCTOR_COUNT];
#ifdef RUN_TYPE_STRUCT
/* create the types */
MPI_Type_struct (2, blocklens, displs, structtypes, &newtype[0]);
-
+
MPI_Type_extent (newtype[0], &basic_extent);
if (basic_extent != sizeof (test_basic_struct_t)) {
fprintf (stderr, "(%d): Unexpected extent for struct\n");
}
MPI_Type_vector (2, 3, 4, newtype[0], &newtype[1]);
- MPI_Type_hvector (3, 2, 15 * sizeof (test_basic_struct_t),
+ MPI_Type_hvector (3, 2, 15 * sizeof (test_basic_struct_t),
newtype[1], &newtype[2]);
displs[1] = 2;
MPI_Type_indexed (2, blocklens, displs, newtype[2], &newtype[3]);
MPI_Type_struct (4, blocklens, displs, structtypes, &newtype[6]);
-#ifdef RUN_TYPE_STRUCT
+#ifdef RUN_TYPE_STRUCT
MPI_Type_commit (&newtype[0]);
#endif
MPI_Type_commit (&newtype[5]);
#endif
-#ifdef RUN_TYPE_STRUCT_LB_UB
-#ifndef RUN_TYPE_STRUCT
+#ifdef RUN_TYPE_STRUCT_LB_UB
+#ifndef RUN_TYPE_STRUCT
/* need the struct type for the receive... */
MPI_Type_commit (&newtype[0]);
#endif
#endif
#ifdef RUN_TYPE_STRUCT_LB_UB
- MPI_Isend (&(struct_lb_ub_send_buf[0].the_double_to_send),
+ MPI_Isend (&(struct_lb_ub_send_buf[0].the_double_to_send),
MSG_COUNT, newtype[6], 1, 6, comm, &aReq[6]);
#else
aReq[6] = MPI_REQUEST_NULL;
#endif
#ifdef RUN_TYPE_STRUCT_LB_UB
- MPI_Irecv (struct_lb_ub_recv_buf,
+ MPI_Irecv (struct_lb_ub_recv_buf,
MSG_COUNT, newtype[0], 0, 6, comm, &aReq[6]);
#else
aReq[6] = MPI_REQUEST_NULL;
#ifdef RUN_TYPE_HVECTOR
/* eight holes in hvector_buf... */
/* hole in first vector, first block... */
- assert ((hvector_buf[i*44 + 3].the_double == 2.0) &&
+ assert ((hvector_buf[i*44 + 3].the_double == 2.0) &&
(hvector_buf[i*44 + 3].the_char == 'b'));
/* hole in second vector, first block... */
assert ((hvector_buf[i*44 + 10].the_double == 2.0) &&
int namelen = 128;
int i;
int blocklens[2], displs[2];
- MPI_Datatype newtype[TYPE_CONSTRUCTOR_COUNT];
- MPI_Datatype newtype2[TYPE_CONSTRUCTOR_COUNT];
+ MPI_Datatype newtype[TYPE_CONSTRUCTOR_COUNT];
+ MPI_Datatype newtype2[TYPE_CONSTRUCTOR_COUNT];
/* init */
MPI_Init (&argc, &argv);
int namelen = 128;
int i;
int blocklens[2], displs[2];
- MPI_Datatype newtype[TYPE_CONSTRUCTOR_COUNT];
- MPI_Datatype newtype2[TYPE_CONSTRUCTOR_COUNT];
+ MPI_Datatype newtype[TYPE_CONSTRUCTOR_COUNT];
+ MPI_Datatype newtype2[TYPE_CONSTRUCTOR_COUNT];
/* init */
MPI_Init (&argc, &argv);
MPI_Type_free (&newtype[j]);
}
}
-
+
MPI_Waitall (TYPES_TO_COMMIT, reqs, statuses);
}
}
//Try SMPI_SHARED_CALL function, which should call hash only once and for all.
char *str = strdup("onceandforall");
if(rank==size-1){
- SMPI_SHARED_CALL(hash,str,str,buf);
+ SMPI_SHARED_CALL(hash,str,str,buf);
}
MPI_Barrier(MPI_COMM_WORLD);
}
}
-double run_test(long long msg_size, MPI_Comm comm, test_t test_type,
+double run_test(long long msg_size, MPI_Comm comm, test_t test_type,
double * max_time)
{
int i, j;
/* Test that:
1: sbuf is large enough
2: rbuf is large enough
- 3: There were no failures (e.g., tmp nowhere > rbuf size
+ 3: There were no failures (e.g., tmp nowhere > rbuf size
*/
MPI_Barrier(comm);
start = MPI_Wtime();
{ MPI_OFFSET, "MPI_OFFSET" },
#endif
/* Size-specific types */
- /* Do not move MPI_REAL4 - this is used to indicate the very first
+ /* Do not move MPI_REAL4 - this is used to indicate the very first
optional type. In addition, you must not add any required types
after this type */
- /* See MPI 2.1, Section 16.2. These are required, predefined types.
+ /* See MPI 2.1, Section 16.2. These are required, predefined types.
If the type is not available (e.g., *only* because the Fortran
compiler does not support it), the value may be MPI_DATATYPE_NULL */
{ MPI_REAL4, "MPI_REAL4" },
/* Semi-optional types - if the compiler doesn't support long double
or long long, these might be MPI_DATATYPE_NULL */
{ MPI_LONG_DOUBLE, "MPI_LONG_DOUBLE" },
- { MPI_LONG_LONG_INT, "MPI_LONG_LONG_INT" },
+ { MPI_LONG_LONG_INT, "MPI_LONG_LONG_INT" },
{ MPI_LONG_LONG, "MPI_LONG_LONG" },
- { MPI_UNSIGNED_LONG_LONG, "MPI_UNSIGNED_LONG_LONG" },
+ { MPI_UNSIGNED_LONG_LONG, "MPI_UNSIGNED_LONG_LONG" },
{ MPI_LONG_DOUBLE_INT, "MPI_LONG_DOUBLE_INT" },
#if MTEST_HAVE_MIN_MPI_VERSION(2,2)
/* added in MPI-2.2 */
int errs = 0;
MTest_Init( &argc, &argv );
-
+
/* Sample some datatypes */
/* See 8.4, "Naming Objects" in MPI-2. The default name is the same
as the datatype name */
inOptional = 0;
for (i=0; mpi_names[i].name != 0; i++) {
/* Are we in the optional types? */
- if (strcmp( mpi_names[i].name, "MPI_REAL4" ) == 0)
+ if (strcmp( mpi_names[i].name, "MPI_REAL4" ) == 0)
inOptional = 1;
/* If this optional type is not supported, skip it */
if (inOptional && mpi_names[i].dtype == MPI_DATATYPE_NULL) continue;
if (mpi_names[i].dtype == MPI_DATATYPE_NULL) {
- /* Report an error because all of the standard types
+ /* Report an error because all of the standard types
must be supported */
errs++;
- fprintf( stderr, "MPI Datatype %s is MPI_DATATYPE_NULL\n",
+ fprintf( stderr, "MPI Datatype %s is MPI_DATATYPE_NULL\n",
mpi_names[i].name );
continue;
}
MPI_Type_get_name( mpi_names[i].dtype, name, &namelen );
if (strncmp( name, mpi_names[i].name, namelen )) {
errs++;
- fprintf( stderr, "Expected %s but got %s\n",
+ fprintf( stderr, "Expected %s but got %s\n",
mpi_names[i].name, name );
}
}
-C -*- Mode: Fortran; -*-
+C -*- Mode: Fortran; -*-
C
C (C) 2003 by Argonne National Laboratory.
C See COPYRIGHT in top-level directory.
-C -*- Mode: Fortran; -*-
+C -*- Mode: Fortran; -*-
C
C (C) 2003 by Argonne National Laboratory.
C See COPYRIGHT in top-level directory.
* See COPYRIGHT in top-level directory.
*/
/*
- * This file contains the C routines used in testing the c2f and f2c
+ * This file contains the C routines used in testing the c2f and f2c
* handle conversion functions, except for MPI_File and MPI_Win (to
* allow working with MPI implementations that do not include those
* features).
#include "../../include/mpitestconf.h"
#include <string.h>
-/*
+/*
Name mapping. All routines are created with names that are lower case
with a single trailing underscore. This matches many compilers.
We use #define to change the name for Fortran compilers that do
- not use the lowercase/underscore pattern
+ not use the lowercase/underscore pattern
*/
#ifdef F77_NAME_UPPER
defined(F77_NAME_MIXED_USCORE)
/* Else leave name alone (routines have no underscore, so both
of these map to a lowercase, single underscore) */
-#else
+#else
#error 'Unrecognized Fortran name mapping'
#endif
int flag;
MPI_Test( &req, &flag, &status );
MPI_Test_cancelled( &status, &flag );
- if (!flag) {
+ if (!flag) {
fprintf( stderr, "Request: Wrong value for flag\n" );
return 1;
}
MPI_Fint c2fop_ ( MPI_Fint *op )
{
MPI_Op cOp = MPI_Op_f2c( *op );
-
+
if (cOp != MPI_SUM) {
fprintf( stderr, "Op: did not get sum\n" );
return 1;
return 0;
}
-/*
+/*
* The following routines provide handles to the calling Fortran program
*/
void f2ccomm_( MPI_Fint * comm )
{
MPI_Request cReq;
- MPI_Irecv( NULL, 0, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG,
+ MPI_Irecv( NULL, 0, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG,
MPI_COMM_WORLD, &cReq );
MPI_Cancel( &cReq );
*req = MPI_Request_c2f( cReq );
-
+
}
void f2cop_( MPI_Fint * op )
/*
Check that MPI_xxxx_c2f, applied to the same object several times,
- yields the same handle. We do this because when MPI handles in
- C are a different length than those in Fortran, care needs to
+ yields the same handle. We do this because when MPI handles in
+ C are a different length than those in Fortran, care needs to
be exercised to ensure that the mapping from one to another is unique.
(Test added to test a potential problem in ROMIO for handling MPI_File
on 64-bit systems)
MTest_Finalize( errs );
MPI_Finalize();
-
+
return 0;
}
* The tests follow this pattern:
*
* Fortran main program
- * calls the c routine f2ctype with each of the C types and the name of
- * the type. That c routine using MPI_Type_f2c to convert the
+ * calls the c routine f2ctype with each of the C types and the name of
+ * the type. That c routine using MPI_Type_f2c to convert the
* Fortran handle to a C handle, and then compares it to the corresponding
* C type, which is found by looking up the C handle by name
*
{ MPI_SHORT_INT, "MPI_SHORT_INT" },
{ MPI_2INT, "MPI_2INT" },
{ MPI_LONG_DOUBLE, "MPI_LONG_DOUBLE" },
- { MPI_LONG_LONG_INT, "MPI_LONG_LONG_INT" },
+ { MPI_LONG_LONG_INT, "MPI_LONG_LONG_INT" },
{ MPI_LONG_LONG, "MPI_LONG_LONG" },
- { MPI_UNSIGNED_LONG_LONG, "MPI_UNSIGNED_LONG_LONG" },
+ { MPI_UNSIGNED_LONG_LONG, "MPI_UNSIGNED_LONG_LONG" },
{ MPI_LONG_DOUBLE_INT, "MPI_LONG_DOUBLE_INT" },
{ 0, (char *)0 }, /* Sentinal used to indicate the last element */
};
-/*
+/*
Name mapping. All routines are created with names that are lower case
with a single trailing underscore. This matches many compilers.
We use #define to change the name for Fortran compilers that do
- not use the lowercase/underscore pattern
+ not use the lowercase/underscore pattern
*/
#ifdef F77_NAME_UPPER
defined(F77_NAME_MIXED_USCORE)
/* Else leave name alone (routines have no underscore, so both
of these map to a lowercase, single underscore) */
-#else
+#else
#error 'Unrecognized Fortran name mapping'
#endif
if (ctype != mpi_names[*typeidx].dtype) {
char mytypename[MPI_MAX_OBJECT_NAME];
int mytypenamelen;
- /* An implementation is not *required* to deliver the
- corresponding C version of the MPI Datatype bit-for-bit. But
+ /* An implementation is not *required* to deliver the
+ corresponding C version of the MPI Datatype bit-for-bit. But
if *must* act like it - e.g., the datatype name must be the same */
MPI_Type_get_name( ctype, mytypename, &mytypenamelen );
if (strcmp( mytypename, mpi_names[*typeidx].name ) != 0) {
mpi_names[*typeidx].name, mytypename, *fhandle, MPI_Type_c2f( ctype ) );
}
}
-
+
return errs;
}
-C -*- Mode: Fortran; -*-
+C -*- Mode: Fortran; -*-
C
C (C) 2003 by Argonne National Laboratory.
C See COPYRIGHT in top-level directory.
-C -*- Mode: Fortran; -*-
+C -*- Mode: Fortran; -*-
C
C (C) 2003 by Argonne National Laboratory.
C See COPYRIGHT in top-level directory.
* See COPYRIGHT in top-level directory.
*/
/*
- * This file contains the C routines used in testing the c2f and f2c
- * handle conversion functions for MPI_Win
+ * This file contains the C routines used in testing the c2f and f2c
+ * handle conversion functions for MPI_Win
*
* The tests follow this pattern:
*
#include "../../include/mpitestconf.h"
#include <string.h>
-/*
+/*
Name mapping. All routines are created with names that are lower case
with a single trailing underscore. This matches many compilers.
We use #define to change the name for Fortran compilers that do
- not use the lowercase/underscore pattern
+ not use the lowercase/underscore pattern
*/
#ifdef F77_NAME_UPPER
defined(F77_NAME_MIXED_USCORE)
/* Else leave name alone (routines have no underscore, so both
of these map to a lowercase, single underscore) */
-#else
+#else
#error 'Unrecognized Fortran name mapping'
#endif
return 0;
}
-/*
+/*
* The following routines provide handles to the calling Fortran program
*/
void f2cwin_( int *win )
* See COPYRIGHT in top-level directory.
*/
/*
- * This file contains the C routines used in testing the c2f and f2c
- * handle conversion functions for MPI_Win
+ * This file contains the C routines used in testing the c2f and f2c
+ * handle conversion functions for MPI_Win
*
* The tests follow this pattern:
*
#include "../../include/mpitestconf.h"
#include <string.h>
-/*
+/*
Name mapping. All routines are created with names that are lower case
with a single trailing underscore. This matches many compilers.
We use #define to change the name for Fortran compilers that do
- not use the lowercase/underscore pattern
+ not use the lowercase/underscore pattern
*/
#ifdef F77_NAME_UPPER
defined(F77_NAME_MIXED_USCORE)
/* Else leave name alone (routines have no underscore, so both
of these map to a lowercase, single underscore) */
-#else
+#else
#error 'Unrecognized Fortran name mapping'
#endif
return 0;
}
-/*
+/*
* The following routines provide handles to the calling Fortran program
*/
void f2cwin_( int *win )
/* include/mpitestconf.h.in. Generated from configure.ac by autoheader. */
/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
+/*
* (C) 2001 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#define POINTERINT_t int
/* The size of `int', as computed by sizeof. */
-#define SIZEOF_INT
+#define SIZEOF_INT
/* The size of `long', as computed by sizeof. */
-#define SIZEOF_LONG
+#define SIZEOF_LONG
/* The size of `long long', as computed by sizeof. */
-#define SIZEOF_LONG_LONG
+#define SIZEOF_LONG_LONG
/* The size of `MPI_Offset', as computed by sizeof. */
-#define SIZEOF_MPI_OFFSET
+#define SIZEOF_MPI_OFFSET
/* The size of `short', as computed by sizeof. */
-#define SIZEOF_SHORT
+#define SIZEOF_SHORT
/* The size of `void *', as computed by sizeof. */
-#define SIZEOF_VOID_P
+#define SIZEOF_VOID_P
/* Define calling convention */
-#define STDCALL
+#define STDCALL
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
- int tab[2][3]={{1*rank,2*rank,3*rank},{7*rank,8*rank,9*rank}};
+ int tab[2][3]={{1*rank,2*rank,3*rank},{7*rank,8*rank,9*rank}};
MPI_Type_contiguous(3, MPI_INT, &type2);
MPI_Type_commit(&type2);
value.b=8.0;
}else{
value.a=10000;
- value.b=5.0;
+ value.b=5.0;
}
MPI_Bcast( &value, 1, mystruct, 0, MPI_COMM_WORLD );
/* Creates the constraints */
lmm_constraint_t *tmp_cnst = xbt_new0(lmm_constraint_t, 15);
- for (int i = 0; i < 15; i++)
+ for (int i = 0; i < 15; i++)
tmp_cnst[i] = lmm_constraint_new(Sys, nullptr, B[i]);
/* Creates the variables */
cnst[i] = lmm_constraint_new(Sys, NULL, float_random(10.0));
int l;
if(rate_no_limit>float_random(1.0))
- //Look at what happens when there is no concurrency limit
+ //Look at what happens when there is no concurrency limit
l=-1;
else
//Badly logarithmically random concurrency limit in [2^pw_base_limit+1,2^pw_base_limit+2^pw_max_limit]
}
unsigned int TestClasses [][4]=
- //Nbcnst Nbvar Baselimit Maxlimit
+ //Nbcnst Nbvar Baselimit Maxlimit
{{ 10 ,10 ,1 ,2 }, //small
{ 100 ,100 ,3 ,6 }, //medium
{ 2000,2000 ,5 ,8 }, //big
{ 20000,20000 ,7 ,10} //huge
- };
+ };
int main(int argc, char **argv)
{
acc_date2+=date*date;
}
- float mean_date= acc_date/(float)testcount;
+ float mean_date= acc_date/(float)testcount;
float stdev_date= sqrt(acc_date2/(float)testcount-mean_date*mean_date);
fprintf(stderr,
int main(void)
{
sem_t s;
- if (sem_init(&s, 0, 0) != 0)
+ if (sem_init(&s, 0, 0) != 0)
return 1;
return 0;
}
// printf("sem_open failed\n");
return 1;
}
-// printf("sem_open succeeded\n");
+// printf("sem_open succeeded\n");
return 0;
}
if (--iterate > 0)
y = growsdown(&y);
- /* The stack sometimes changes at the 0th level.
+ /* The stack sometimes changes at the 0th level.
* Original version did fail in this case, but I changed this around SimGrid 3.13 because of https://bugs.debian.org/814272
* Every arch failed on that day :(
*/