From: markls Date: Tue, 12 Jun 2007 04:25:02 +0000 (+0000) Subject: added smpi to cvs repository. still need to do a lot of integration work. X-Git-Tag: v3.3~1761 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/d47debaeffa6807956ee9cc7da90759efe49beda added smpi to cvs repository. still need to do a lot of integration work. git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/simgrid/simgrid/trunk@3599 48e7efb5-ca39-0410-a469-dd3cf9ba447f --- diff --git a/src/smpi/include/smpi.h b/src/smpi/include/smpi.h new file mode 100644 index 0000000000..26de7f2e47 --- /dev/null +++ b/src/smpi/include/smpi.h @@ -0,0 +1,121 @@ +#define DEFAULT_POWER 100 + +#define MPI_ANY_SOURCE -1 + +// errorcodes +#define MPI_SUCCESS 0 +#define MPI_ERR_COMM 1 +#define MPI_ERR_ARG 2 +#define MPI_ERR_TYPE 3 +#define MPI_ERR_REQUEST 4 +#define MPI_ERR_INTERN 5 +#define MPI_ERR_COUNT 6 +#define MPI_ERR_RANK 7 +#define MPI_ERR_TAG 8 + +#include +#include + +typedef enum { MPI_PORT = 0, SEND_SYNC_PORT, RECV_SYNC_PORT, MAX_CHANNEL } channel_t; + +// MPI_Comm +struct smpi_mpi_communicator_t { + int id; + int size; + int barrier; + m_host_t *hosts; + m_process_t *processes; +}; +typedef struct smpi_mpi_communicator_t smpi_mpi_communicator_t; +typedef smpi_mpi_communicator_t *MPI_Comm; +extern smpi_mpi_communicator_t smpi_mpi_comm_world; +#define MPI_COMM_WORLD (&smpi_mpi_comm_world) + +// MPI_Status +struct smpi_mpi_status_t { + int MPI_SOURCE; +}; +typedef struct smpi_mpi_status_t smpi_mpi_status_t; +typedef smpi_mpi_status_t MPI_Status; +extern smpi_mpi_status_t smpi_mpi_status_ignore; +#define MPI_STATUS_IGNORE (&smpi_mpi_status_ignore) + +// MPI_Datatype +struct smpi_mpi_datatype_t { +// int type; + size_t size; +}; +typedef struct smpi_mpi_datatype_t smpi_mpi_datatype_t; +typedef smpi_mpi_datatype_t *MPI_Datatype; +// FIXME: add missing datatypes +extern smpi_mpi_datatype_t smpi_mpi_byte; +#define MPI_BYTE (&smpi_mpi_byte) +extern smpi_mpi_datatype_t smpi_mpi_int; +#define MPI_INT (&smpi_mpi_int) +extern smpi_mpi_datatype_t smpi_mpi_double; +#define MPI_DOUBLE (&smpi_mpi_double) + +struct smpi_waitlist_node_t { + m_process_t process; + struct smpi_waitlist_node_t *next; +}; +typedef struct smpi_waitlist_node_t smpi_waitlist_node_t; + +// FIXME: maybe it isn't appropriate to have the next pointer inside +// MPI_Request +struct smpi_mpi_request_t { + void *buf; + int count; + smpi_mpi_datatype_t *datatype; + int src; + int dst; + int tag; + smpi_mpi_communicator_t *comm; + short int completed; + smpi_waitlist_node_t *waitlist; + struct smpi_mpi_request_t *next; + int fwdthrough; +}; +typedef struct smpi_mpi_request_t smpi_mpi_request_t; +typedef smpi_mpi_request_t *MPI_Request; + +// MPI_Op +struct smpi_mpi_op_t { + void (*func)(void *x, void *y, void *z); +}; +typedef struct smpi_mpi_op_t smpi_mpi_op_t; +typedef smpi_mpi_op_t *MPI_Op; +extern smpi_mpi_op_t smpi_mpi_land; +#define MPI_LAND (&smpi_mpi_land) +extern smpi_mpi_op_t smpi_mpi_sum; +#define MPI_SUM (&smpi_mpi_sum) + +// smpi_received_t +struct smpi_received_t { + int commid; + int src; + int dst; + int tag; + int fwdthrough; + void *data; + struct smpi_received_t *next; +}; +typedef struct smpi_received_t smpi_received_t; + +// sender/receiver (called by main routine) +int smpi_sender(int argc, char *argv[]); +int smpi_receiver(int argc, char *argv[]); + +// smpi functions +int smpi_comm_rank(smpi_mpi_communicator_t *comm, m_host_t host); +void smpi_isend(smpi_mpi_request_t*); +void smpi_irecv(smpi_mpi_request_t*); +void smpi_barrier(smpi_mpi_communicator_t *comm); +void smpi_wait(smpi_mpi_request_t *request, smpi_mpi_status_t *status); +void smpi_wait_all(int count, smpi_mpi_request_t **requests, smpi_mpi_status_t *statuses); +void smpi_wait_all_nostatus(int count, smpi_mpi_request_t **requests); +void smpi_bench_begin(); +void smpi_bench_end(); +int smpi_create_request(void *buf, int count, smpi_mpi_datatype_t *datatype, int src, int dst, int tag, smpi_mpi_communicator_t *comm, smpi_mpi_request_t **request); +unsigned int smpi_sleep(unsigned int); +void smpi_exit(int); diff --git a/src/smpi/sample/allreduce.c b/src/smpi/sample/allreduce.c new file mode 100644 index 0000000000..1f8cd884b4 --- /dev/null +++ b/src/smpi/sample/allreduce.c @@ -0,0 +1,29 @@ +#include +#include +#include + +int main(int argc, char *argv[]) { + int rank, size; + int i; + int *sendbuf, *recvbuf; + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + sendbuf = malloc(sizeof(int) * size); + recvbuf = malloc(sizeof(int) * size); + for (i = 0; i < size; i++) { + sendbuf[i] = 0; + recvbuf[i] = 0; + } + sendbuf[rank] = rank + 1; + MPI_Allreduce(sendbuf, recvbuf, size, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + printf("node %d has: ", rank); + for (i = 0; i < size; i++) { + printf("%d ", recvbuf[i]); + } + printf("\n"); + free(sendbuf); + free(recvbuf); + MPI_Finalize(); + return 0; +} diff --git a/src/smpi/sample/alltoall.c b/src/smpi/sample/alltoall.c new file mode 100644 index 0000000000..c11a6dd416 --- /dev/null +++ b/src/smpi/sample/alltoall.c @@ -0,0 +1,173 @@ +#include "mpi.h" +#include +#include +#include +#include + +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#define EXIT_FAILURE 1 +#endif + +// sandler says, compile with mpicc -v alltoalldemo.c +// run with mpirun -np 3 a.out -m 5 + +int main( int argc, char *argv[] ) +{ + int rank, size; + int chunk = 128; + int i; + int j; // added by sandler + int *sb; + int *rb; + int status, gstatus; + + MPI_Init(&argc,&argv); + MPI_Comm_rank(MPI_COMM_WORLD,&rank); + MPI_Comm_size(MPI_COMM_WORLD,&size); + if (rank==0) { + printf("size: %d\n", size); + } + for ( i=1 ; i < argc ; ++i ) { + if ( argv[i][0] != '-' ) { + // added by sandler + fprintf(stderr, "Unrecognized option %s\n", argv[i]);fflush(stderr); + continue; + } + switch(argv[i][1]) { + case 'm': + chunk = atoi(argv[++i]); + if (rank==0) { + printf("chunk: %d\n", chunk); + } + break; + default: + fprintf(stderr, "Unrecognized argument %s\n", argv[i]);fflush(stderr); + MPI_Abort(MPI_COMM_WORLD,EXIT_FAILURE); + } + } + sb = (int *)malloc(size*chunk*sizeof(int)); + if ( !sb ) { + perror( "can't allocate send buffer" );fflush(stderr); + MPI_Abort(MPI_COMM_WORLD,EXIT_FAILURE); + } + rb = (int *)malloc(size*chunk*sizeof(int)); + if ( !rb ) { + perror( "can't allocate recv buffer");fflush(stderr); + free(sb); + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + } + + + /* original deino.net: + for ( i=0 ; i < size*chunk ; ++i ) { + sb[i] = sb[i] = rank + 1; + rb[i] = 0; + } + */ + // written by sandler + + if (rank==0) printf("note in the following:\n" + "if you were to compare the sending buffer and the receiving buffer on the SAME processor, \n" + "you might think that the values were getting wiped out. However, each row IS going somewhere. \n" + "The 0th row of processor 0 goes to the 0th row of processor 0\n" + "The 1st row of processor 0 goes to the 0th row of processor 1. (Go look at rb for processor 1!)\n" + "\n" + "Too bad the values don't come out in a deterministic order. That's life!\n" + "\n" + "Now look at the receiving buffer for processor 0.\n" + "The 0th row is from processor 0 (itself).\n" + "The 1st row on processor 0 is from the 0th row on processor 1. (Go look at the sb of processor 1!)\n" + "\n" + "Apparently this is the intended behavior.\n" + "\n" + "Note that each row is always moved as one chunk, unchangeable.\n" + "\n" + "TODO: draw a diagram\n" + ); + + for (i=0; i +#include + +int main (int argc, char **argv) { + int size, rank; + int value = 3; + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &size); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (0 == rank) { + value = 17; + } + printf("node %d has value %d\n", rank, value); + MPI_Bcast(&value, 1, MPI_INT, 0, MPI_COMM_WORLD); + printf("node %d has value %d\n", rank, value); + MPI_Finalize(); + return 0; +} diff --git a/src/smpi/sample/bcbench.c b/src/smpi/sample/bcbench.c new file mode 100644 index 0000000000..85d36565b0 --- /dev/null +++ b/src/smpi/sample/bcbench.c @@ -0,0 +1,87 @@ +#include +#include +#include + +#define GETTIMEOFDAY_ERROR 1 + +#define N_START 1 +#define N_STOP 1024*1024 +#define N_NEXT (N*2) +#define ITER 100 +#define ONE_MILLION 1000000.0 +#define RAND_SEED 842270 + +int main(int argc, char* argv[]) { + + int size, rank; + int N, I; + struct timeval *start_time, *stop_time; + double seconds; + int i, j, k; + char *buffer; + int check; + + srandom(RAND_SEED); + + MPI_Init(&argc, &argv); + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + if (0 == rank) { + start_time = (struct timeval *)malloc(sizeof(struct timeval)); + stop_time = (struct timeval *)malloc(sizeof(struct timeval)); + } + + for (N = N_START ; N <= N_STOP ; N = N_NEXT) { + + buffer = malloc(sizeof(char) * N); + + if (0 == rank) { + for (j = 0; j < N; j++) { + buffer[j] = (char)(random() % 256); + } + if (-1 == gettimeofday(start_time, NULL)) { + printf("couldn't set start_time on node 0!\n"); + MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR); + exit(EXIT_FAILURE); + } + } + + for (i = 0; i < ITER; i++) { + MPI_Bcast(buffer, N, MPI_BYTE, 0, MPI_COMM_WORLD); + if (0 == rank) { + for (j = 1; j < size; j++) { + MPI_Recv(&check, 1, MPI_INT, MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } else { + MPI_Send(&rank, 1, MPI_INT, 0, 1, MPI_COMM_WORLD); + } + } + + if (0 == rank) { + if (-1 == gettimeofday(stop_time, NULL)) { + printf("couldn't set start_time on node 0!\n"); + MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR); + exit(EXIT_FAILURE); + } + seconds = (double)(stop_time->tv_sec - start_time->tv_sec) + (double)(stop_time->tv_usec - start_time->tv_usec) / ONE_MILLION; + } + + free(buffer); + + if (0 == rank) { + printf("N: %10d, iter: %d, time: %10f s, avg rate: %12f Mbps\n", N, ITER, seconds, ((double)N * ITER * 8) / (1024.0 * 1024.0 * seconds)); + } + + } + + if (0 == rank) { + free(start_time); + free(stop_time); + } + + MPI_Finalize(); + + return 0; +} diff --git a/src/smpi/sample/first.c b/src/smpi/sample/first.c new file mode 100644 index 0000000000..d78aeac026 --- /dev/null +++ b/src/smpi/sample/first.c @@ -0,0 +1,56 @@ +/* A first simple SPMD example program using MPI */ + +/* The program consists of on receiver process and N-1 sender */ +/* processes. The sender processes send a message consisting */ +/* of their process identifier (id) and the total number of */ +/* processes (ntasks) to the receiver. The receiver process */ +/* prints out the values it receives in the messeges from the */ +/* senders. */ + +/* Compile the program with 'mpicc first.c -o first' */ +/* To run the program, using four of the computers specified in */ +/* your hostfile, do 'mpirun -machinefile hostfile -np 4 first */ + +#include +#include +main(int argc, char *argv[]) { + const int tag = 42; /* Message tag */ + int id, ntasks, source_id, dest_id, err, i; + MPI_Status status; + int msg[2]; /* Message array */ + + err = MPI_Init(&argc, &argv); /* Initialize MPI */ + if (err != MPI_SUCCESS) { + printf("MPI initialization failed!\n"); + exit(1); + } + err = MPI_Comm_size(MPI_COMM_WORLD, &ntasks); /* Get nr of tasks */ + err = MPI_Comm_rank(MPI_COMM_WORLD, &id); /* Get id of this process */ + if (ntasks < 2) { + printf("You have to use at least 2 processors to run this program\n"); + MPI_Finalize(); /* Quit if there is only one processor */ + exit(0); + } + + if (id == 0) { /* Process 0 (the receiver) does this */ + for (i=1; i +#include +#include +#include + +#define ITERATIONS 10 +#define STEPS 1 +#define STEP_SIZE 0 + +#define USAGE_ERROR 1 +#define MALLOC_ERROR 2 +#define GETTIMEOFDAY_ERROR 3 + +void * checked_malloc(int rank, const char * varname, size_t size) { + void * ptr; + ptr = malloc(size); + if (NULL == ptr) { + printf("node %d could not malloc memory for %s.\n", rank, varname); + MPI_Abort(MPI_COMM_WORLD, MALLOC_ERROR); + exit(MALLOC_ERROR); + } + return ptr; +} + +int main(int argc, char* argv[]) { + + // timing/system variables + int iteration, iterations = ITERATIONS; + int step, steps = STEPS, step_size = STEP_SIZE; + long usecs, total_usecs; + struct timeval *start_time, *stop_time; + char *program; + + // mpi/communications variables + int rank; + int row, col; + MPI_Comm row_comm, col_comm; + + // algorithm variables + int N_start, N, P; + int *A, *A_t, *B, *C, *D, *a, *b, *abuf, *bbuf; + int n, i, j, k, I, J; + + MPI_Init(&argc, &argv); + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + if (0 == rank) { + int size; + MPI_Comm_size(MPI_COMM_WORLD, &size); + + program = basename(argv[0]); + + // root node parses cmdline args + /* + if (3 > argc || !isdigit(*argv[1]) || !isdigit(*argv[2])) { + printf("usage:\n%s

[]\n", program); + MPI_Abort(MPI_COMM_WORLD, USAGE_ERROR); + exit(USAGE_ERROR); + } + */ + + //N_start = atoi(argv[1]); + //P = atoi(argv[2]); + N_start = 100; + P = 2; + + /* + if (4 <= argc && isdigit(*argv[3])) { + iterations = atoi(argv[3]); + } + + if (5 <= argc && isdigit(*argv[4])) { + steps = atoi(argv[4]); + } + + if (6 <= argc && isdigit(*argv[5])) { + step_size = atoi(argv[5]); + } + */ + + if (P*P != size) { + printf("P^2 must equal size.\n"); + MPI_Abort(MPI_COMM_WORLD, USAGE_ERROR); + exit(USAGE_ERROR); + } + + start_time = (struct timeval *)checked_malloc(rank, "start_time", sizeof(struct timeval)); + stop_time = (struct timeval *)checked_malloc(rank, "stop_time", sizeof(struct timeval)); + + } + + // send command line parameters except N, since it can vary + MPI_Bcast(&P, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&iterations, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&steps, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&step_size, 1, MPI_INT, 0, MPI_COMM_WORLD); + + row = rank / P; + col = rank % P; + + // create row/column communicators + MPI_Comm_split(MPI_COMM_WORLD, row, col, &row_comm); + MPI_Comm_split(MPI_COMM_WORLD, col, row, &col_comm); + + for (step = 0; step < steps; step++) { + + total_usecs = 0; + + if (0 == rank) { + N = N_start + step * step_size; + if ((N/P)*P != N) { + printf("P must divide N and %d does not divide %d.\n", P, N); + N = -1; + } + } + + MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD); + + // if root passes N = -1, skip this round + if (-1 == N) continue; + + n = N / P; + + // initialize matrix components + A = (int *)checked_malloc(rank, "A", n*n*sizeof(int)); + A_t = (int *)checked_malloc(rank, "A_t", n*n*sizeof(int)); + B = (int *)checked_malloc(rank, "B", n*n*sizeof(int)); + C = (int *)checked_malloc(rank, "C", n*n*sizeof(int)); + D = (int *)checked_malloc(rank, "D", n*n*sizeof(int)); + + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + + I = n*row+i; + J = n*col+j; + + A[n*i+j] = I+J; + B[n*i+j] = I; + + // d is the check matrix + D[n*i+j] = 0; + for (k = 0; k < N; k++) { + // A[I,k] = I+k + // B[k,J] = k + D[n*i+j] += (I+k) * k; + } + + } + } + + // buffers + abuf = (int *)checked_malloc(rank, "abuf", n*sizeof(int)); + bbuf = (int *)checked_malloc(rank, "bbuf", n*sizeof(int)); + + for (iteration = 0; iteration < iterations; iteration++) { + + for (i = 0; i < n*n; i++) { + C[i] = 0; + } + + // node zero sets start time + if (0 == rank && -1 == gettimeofday(start_time, NULL)) { + printf("couldn't set start_time on node 0!\n"); + MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR); + exit(GETTIMEOFDAY_ERROR); + } + + // populate transpose of A + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + A_t[n*i+j] = A[n*j+i]; + } + } + + // perform calculations + for (k = 0; k < N; k++) { + + if (k/n == col) { + a = A_t + n*(k%n); + } else { + a = abuf; + } + + if (k/n == row) { + b = B + n*(k%n); + } else { + b = bbuf; + } + + MPI_Bcast(a, n, MPI_INT, k/n, row_comm); + MPI_Bcast(b, n, MPI_INT, k/n, col_comm); + + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + C[n*i+j] += a[i] * b[j]; + } + } + + } // for k + + // node zero sets stop time + if (0 == rank && -1 == gettimeofday(stop_time, NULL)) { + printf("couldn't set stop_time on node 0!\n"); + MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR); + exit(GETTIMEOFDAY_ERROR); + } + + // check calculation + for (i = 0; i < n*n && C[i] == D[i]; i++); + j = (n*n == i); + MPI_Reduce(&j, &k, 1, MPI_INT, MPI_LAND, 0, MPI_COMM_WORLD); + + // node zero prints stats + if (0 == rank) { + usecs = (stop_time->tv_sec*1000000+stop_time->tv_usec) - (start_time->tv_sec*1000000+start_time->tv_usec); + printf("prog: %s, N: %d, P: %d, procs: %d, time: %d us, check: %d\n", program, N, P, P*P, usecs, k); + total_usecs += usecs; + } + + } + + // node 0 prints final stats + if (0 == rank) { + printf("prog: %s, N: %d, P: %d, procs: %d, iterations: %d, avg. time: %d us\n", + program, N, P, P*P, iterations, total_usecs / iterations); + } + + // free data structures + free(A); + free(A_t); + free(B); + free(C); + free(D); + free(abuf); + free(bbuf); + + } + + if (0 == rank) { + free(start_time); + free(stop_time); + } + + MPI_Finalize(); + + return 0; +} diff --git a/src/smpi/sample/mvmul.c b/src/smpi/sample/mvmul.c new file mode 100644 index 0000000000..aa32e30f22 --- /dev/null +++ b/src/smpi/sample/mvmul.c @@ -0,0 +1,218 @@ +#include +#include +#include + +#define ITERATIONS 10 + +#define USAGE_ERROR 1 +#define SANITY_ERROR 2 +#define GETTIMEOFDAY_ERROR 3 + +int main(int argc, char* argv[]) { + + int size, rank; + int N, n, i, j, k, current_iteration, successful_iterations = 0; + double *matrix, *vector, *vcalc, *vcheck; + MPI_Status status; + struct timeval *start_time, *stop_time; + long parallel_usecs, parallel_usecs_total = 0, sequential_usecs, sequential_usecs_total = 0; + + MPI_Init(&argc, &argv); + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + if (0 == rank) { + + // root node parses cmdline args + if (2 > argc || !isdigit(*argv[1])) { + printf("usage:\n%s \n", argv[0]); + MPI_Abort(MPI_COMM_WORLD, USAGE_ERROR); + exit(USAGE_ERROR); + } + + N = atoi(argv[1]); + + start_time = (struct timeval *)malloc(sizeof(struct timeval)); + stop_time = (struct timeval *)malloc(sizeof(struct timeval)); + + } + + for(current_iteration = 0; current_iteration < ITERATIONS; current_iteration++) { + + if (0 == rank) { + + matrix = (double *)malloc(N*N*sizeof(double)); + vector = (double *)malloc(N*sizeof(double)); + + for(i = 0; i < N*N; i++) { + matrix[i] = (double)rand()/((double)RAND_MAX + 1); + } + + for(i = 0; i < N; i++) { + vector[i] = (double)rand()/((double)RAND_MAX + 1); + } + + // for the sake of argument, the parallel algorithm begins + // when the root node begins to transmit the matrix to the + // workers. + if (-1 == gettimeofday(start_time, NULL)) { + printf("couldn't set start_time on node 0!\n"); + MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR); + exit(GETTIMEOFDAY_ERROR); + } + + for(i = 1; i < size; i++) { + MPI_Send(&N, 1, MPI_INT, i, 0, MPI_COMM_WORLD); + } + + } else { + MPI_Recv(&N, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status); + } + + // this algorithm uses at most N processors... + if (rank < N) { + + if (size > N) size = N; + n = N / size + ((rank < (N % size)) ? 1 : 0); + + if (0 == rank) { + + for(i = 1, j = n; i < size && j < N; i++, j+=k) { + k = N / size + ((i < (N % size)) ? 1 : 0); + MPI_Send(matrix+N*j, N*k, MPI_DOUBLE, i, 0, MPI_COMM_WORLD); + MPI_Send(vector, N, MPI_DOUBLE, i, 0, MPI_COMM_WORLD); + } + + // sanity check + #ifdef DEBUG + if(i != size || j != N) { + printf("index calc error: i = %d, size = %d, j = %d, N = %d\n", i, size, j, N); + MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR); + exit(SANITY_ERROR); + } + #endif + + vcalc = (double *)malloc(N*sizeof(double)); + + } else { + + matrix = (double *)malloc(N*n*sizeof(double)); + vector = (double *)malloc(N*sizeof(double)); + vcalc = (double *)malloc(n*sizeof(double)); + + MPI_Recv(matrix, N*n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status); + MPI_Recv(vector, N, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status); + + } + + for(i = 0; i < n; i++) { + vcalc[i] = 0.0; + for(j = 0; j < N; j++) { + vcalc[i] += matrix[N*i+j] * vector[j]; + } + } + + if (0 != rank) { + MPI_Send(vcalc, n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); + } else { + + for(i = 1, j = n; i < size && j < N; i++, j+=k) { + k = N / size + ((i < (N % size)) ? 1 : 0); + MPI_Recv(vcalc+j, k, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &status); + } + + // sanity check + #ifdef DEBUG + if(i != size || j != N) { + printf("index calc error 2: i = %d, size = %d, j = %d, N = %d\n", i, size, j, N); + MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR); + exit(SANITY_ERROR); + } + #endif + + if (-1 == gettimeofday(stop_time, NULL)) { + printf("couldn't set stop_time on node 0!\n"); + MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR); + exit(GETTIMEOFDAY_ERROR); + } + + parallel_usecs = (stop_time->tv_sec*1000000+stop_time->tv_usec) - (start_time->tv_sec*1000000+start_time->tv_usec); + + if (-1 == gettimeofday(start_time, NULL)) { + printf("couldn't set start_time on node 0!\n"); + MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR); + exit(GETTIMEOFDAY_ERROR); + } + + // calculate serially + vcheck = (double *)malloc(N*sizeof(double)); + for(i = 0; i < N; i++) { + vcheck[i] = 0.0; + for(j = 0; j < N; j++) { + vcheck[i] += matrix[N*i+j] * vector[j]; + } + } + + if (-1 == gettimeofday(stop_time, NULL)) { + printf("couldn't set stop_time on node 0!\n"); + MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR); + exit(GETTIMEOFDAY_ERROR); + } + + sequential_usecs = (stop_time->tv_sec*1000000+stop_time->tv_usec) - (start_time->tv_sec*1000000+start_time->tv_usec); + + // verify correctness + for(i = 0; i < N && vcalc[i] == vcheck[i]; i++); + + printf("prog: blocking, i: %d ", current_iteration); + + if (i == N) { + printf("ptime: %d us, stime: %d us, speedup: %.3f, nodes: %d, efficiency: %.3f\n", + parallel_usecs, + sequential_usecs, + (double)sequential_usecs / (double)parallel_usecs, + size, + (double)sequential_usecs / ((double)parallel_usecs * (double)size)); + + parallel_usecs_total += parallel_usecs; + sequential_usecs_total += sequential_usecs; + successful_iterations++; + } else { + printf("parallel calc != serial calc, "); + } + + free(vcheck); + + } + + free(matrix); + free(vector); + free(vcalc); + } + + } + + if(0 == rank) { + printf("prog: blocking, "); + if(0 < successful_iterations) { + printf("iterations: %d, avg. ptime: %d us, avg. stime: %d us, avg. speedup: %.3f, nodes: %d, avg. efficiency: %.3f\n", + successful_iterations, + parallel_usecs_total / successful_iterations, + sequential_usecs_total / successful_iterations, + (double)sequential_usecs_total / (double)parallel_usecs_total, + size, + (double)sequential_usecs_total / ((double)parallel_usecs_total * (double)size)); + } else { + printf("no successful iterations!\n"); + } + + free(start_time); + free(stop_time); + + } + + MPI_Finalize(); + + return 0; +} diff --git a/src/smpi/sample/reduce.c b/src/smpi/sample/reduce.c new file mode 100644 index 0000000000..9fc0be2483 --- /dev/null +++ b/src/smpi/sample/reduce.c @@ -0,0 +1,31 @@ +#include +#include +#include + +int main(int argc, char *argv[]) { + int rank, size; + int i; + int *sendbuf, *recvbuf; + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + sendbuf = malloc(sizeof(int) * size); + recvbuf = malloc(sizeof(int) * size); + for (i = 0; i < size; i++) { + sendbuf[i] = 0; + recvbuf[i] = 0; + } + sendbuf[rank] = rank + 1; + MPI_Reduce(sendbuf, recvbuf, size, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + if (0 == rank) { + printf("nodes: ", rank); + for (i = 0; i < size; i++) { + printf("%d ", recvbuf[i]); + } + printf("\n"); + } + free(sendbuf); + free(recvbuf); + MPI_Finalize(); + return 0; +} diff --git a/src/smpi/sample/ring_c.c b/src/smpi/sample/ring_c.c new file mode 100644 index 0000000000..c301ab33b5 --- /dev/null +++ b/src/smpi/sample/ring_c.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * + * Simple ring test program + */ + +#include +#include "mpi.h" + +int main(int argc, char *argv[]) +{ + int rank, size, next, prev, message, tag = 201; + + /* Start up MPI */ + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + /* Calculate the rank of the next process in the ring. Use the + modulus operator so that the last process "wraps around" to + rank zero. */ + + next = (rank + 1) % size; + prev = (rank + size - 1) % size; + + /* If we are the "master" process (i.e., MPI_COMM_WORLD rank 0), + put the number of times to go around the ring in the + message. */ + + if (0 == rank) { + message = 10; + + printf("Process 0 sending %d to %d, tag %d (%d processes in ring)\n", + message, next, tag, size); + MPI_Send(&message, 1, MPI_INT, next, tag, MPI_COMM_WORLD); + printf("Process 0 sent to %d\n", next); + } + + /* Pass the message around the ring. The exit mechanism works as + follows: the message (a positive integer) is passed around the + ring. Each time it passes rank 0, it is decremented. When + each processes receives a message containing a 0 value, it + passes the message on to the next process and then quits. By + passing the 0 message first, every process gets the 0 message + and can quit normally. */ + + sleep(3); + + while (1) { + MPI_Recv(&message, 1, MPI_INT, prev, tag, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + + if (0 == rank) { + --message; + printf("Process 0 decremented value: %d\n", message); + } + + MPI_Send(&message, 1, MPI_INT, next, tag, MPI_COMM_WORLD); + if (0 == message) { + printf("Process %d exiting\n", rank); + break; + } + } + + /* The last process does one extra send to process 0, which needs + to be received before the program can exit */ + + if (0 == rank) { + MPI_Recv(&message, 1, MPI_INT, prev, tag, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + + /* All done */ + + MPI_Finalize(); + return 0; +} diff --git a/src/smpi/scripts/smpicc b/src/smpi/scripts/smpicc new file mode 100755 index 0000000000..87700d60e6 --- /dev/null +++ b/src/smpi/scripts/smpicc @@ -0,0 +1,80 @@ +#!/bin/sh +#FIXME: .. paths... +SIMGRID_INCLUDE="${SIMGRID_HOME}/include" +SIMGRID_LIB="${SIMGRID_HOME}/lib" +CC="gcc" + +SMPI_INCLUDE="${SMPI_HOME}/include" +SMPI_LIB="${SMPI_HOME}/lib" +SEED="221238" + +TMPDIR="$(mktemp -d tmpXXXXXXX)" + +function modsource { + SOURCE="$1" + SOURCEFILE="$(basename ${SOURCE})" + SOURCEDIR="${SOURCE%${SOURCEFILE}}" + if [ -n "${SOURCEDIR}" ]; then + mkdir -p ${TMPDIR}${SOURCEDIR} + fi + TMPSOURCE="${TMPDIR}${SOURCE}" + cat > ${TMPSOURCE} <

> ${TMPSOURCE} + grep -q "smpi_main" ${TMPSOURCE} + if [ $? -eq 0 ]; then + cat >> ${TMPSOURCE} <