X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/fce65f194ca0bb025602ff52929597c76a99bbfb..2114c044f7a785c1c53c0f69d0203fd50c2175d5:/src/smpi/smpi_mpi_dt.c diff --git a/src/smpi/smpi_mpi_dt.c b/src/smpi/smpi_mpi_dt.c index 1477cec75c..54eecc2710 100644 --- a/src/smpi/smpi_mpi_dt.c +++ b/src/smpi/smpi_mpi_dt.c @@ -1,7 +1,7 @@ /* smpi_mpi_dt.c -- MPI primitives to handle datatypes */ /* FIXME: a very incomplete implementation */ -/* Copyright (c) 2009, 2010. The SimGrid Team. +/* Copyright (c) 2009-2014. The SimGrid Team. * All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it @@ -10,31 +10,42 @@ #include #include #include - +#include #include "private.h" #include "smpi_mpi_dt_private.h" +#include "mc/mc.h" +#include "xbt/replay.h" +#include "simgrid/modelchecker.h" XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_mpi_dt, smpi, "Logging specific to SMPI (datatype)"); +#define INTSIZEDCHAR (sizeof(int)*CHAR_BIT-1)/3 + 3 +xbt_dict_t smpi_type_keyvals = NULL; +int type_keyval_id=0;//avoid collisions + #define CREATE_MPI_DATATYPE(name, type) \ static s_smpi_mpi_datatype_t mpi_##name = { \ + (char*) # name, \ sizeof(type), /* size */ \ 0, /*was 1 has_subtype*/ \ 0, /* lb */ \ sizeof(type), /* ub = lb + size */ \ DT_FLAG_BASIC, /* flags */ \ - NULL /* pointer on extended struct*/ \ + NULL, /* attributes */ \ + NULL, /* pointer on extended struct*/ \ }; \ MPI_Datatype name = &mpi_##name; #define CREATE_MPI_DATATYPE_NULL(name) \ static s_smpi_mpi_datatype_t mpi_##name = { \ + (char*) # name, \ 0, /* size */ \ 0, /*was 1 has_subtype*/ \ 0, /* lb */ \ 0, /* ub = lb + size */ \ DT_FLAG_BASIC, /* flags */ \ + NULL, /* attributes */ \ NULL /* pointer on extended struct*/ \ }; \ MPI_Datatype name = &mpi_##name; @@ -48,6 +59,10 @@ typedef struct { float value; float index; } float_float; +typedef struct { + long value; + long index; +} long_long; typedef struct { double value; double index; @@ -72,7 +87,10 @@ typedef struct { long double value; int index; } long_double_int; - +typedef struct { + int64_t value; + int64_t index; +} integer128_t; // Predefined data types CREATE_MPI_DATATYPE(MPI_CHAR, char); CREATE_MPI_DATATYPE(MPI_SHORT, short); @@ -90,6 +108,7 @@ CREATE_MPI_DATATYPE(MPI_DOUBLE, double); CREATE_MPI_DATATYPE(MPI_LONG_DOUBLE, long double); CREATE_MPI_DATATYPE(MPI_WCHAR, wchar_t); CREATE_MPI_DATATYPE(MPI_C_BOOL, _Bool); +CREATE_MPI_DATATYPE(MPI_BYTE, int8_t); CREATE_MPI_DATATYPE(MPI_INT8_T, int8_t); CREATE_MPI_DATATYPE(MPI_INT16_T, int16_t); CREATE_MPI_DATATYPE(MPI_INT32_T, int32_t); @@ -111,23 +130,41 @@ CREATE_MPI_DATATYPE(MPI_SHORT_INT, short_int); CREATE_MPI_DATATYPE(MPI_2INT, int_int); CREATE_MPI_DATATYPE(MPI_2FLOAT, float_float); CREATE_MPI_DATATYPE(MPI_2DOUBLE, double_double); +CREATE_MPI_DATATYPE(MPI_2LONG, long_long); + +CREATE_MPI_DATATYPE(MPI_REAL, float); +CREATE_MPI_DATATYPE(MPI_REAL4, float); +CREATE_MPI_DATATYPE(MPI_REAL8, float); +CREATE_MPI_DATATYPE(MPI_REAL16, double); +CREATE_MPI_DATATYPE_NULL(MPI_COMPLEX8); +CREATE_MPI_DATATYPE_NULL(MPI_COMPLEX16); +CREATE_MPI_DATATYPE_NULL(MPI_COMPLEX32); +CREATE_MPI_DATATYPE(MPI_INTEGER1, int); +CREATE_MPI_DATATYPE(MPI_INTEGER2, int16_t); +CREATE_MPI_DATATYPE(MPI_INTEGER4, int32_t); +CREATE_MPI_DATATYPE(MPI_INTEGER8, int64_t); +CREATE_MPI_DATATYPE(MPI_INTEGER16, integer128_t); CREATE_MPI_DATATYPE(MPI_LONG_DOUBLE_INT, long_double_int); CREATE_MPI_DATATYPE_NULL(MPI_UB); CREATE_MPI_DATATYPE_NULL(MPI_LB); -CREATE_MPI_DATATYPE_NULL(MPI_PACKED); +CREATE_MPI_DATATYPE(MPI_PACKED, char); // Internal use only CREATE_MPI_DATATYPE(MPI_PTR, void*); +/** Check if the datatype is usable for communications + */ +int is_datatype_valid(MPI_Datatype datatype) { + return datatype != MPI_DATATYPE_NULL + && (datatype->flags & DT_FLAG_COMMITED); +} size_t smpi_datatype_size(MPI_Datatype datatype) { return datatype->size; } - - MPI_Aint smpi_datatype_lb(MPI_Datatype datatype) { return datatype->lb; @@ -138,64 +175,111 @@ MPI_Aint smpi_datatype_ub(MPI_Datatype datatype) return datatype->ub; } +int smpi_datatype_dup(MPI_Datatype datatype, MPI_Datatype* new_t) +{ + int ret=MPI_SUCCESS; + *new_t= xbt_new(s_smpi_mpi_datatype_t,1); + memcpy(*new_t, datatype, sizeof(s_smpi_mpi_datatype_t)); + if (datatype->has_subtype){ + //FIXME: may copy too much information. + (*new_t)->substruct=xbt_malloc(sizeof(s_smpi_mpi_struct_t)); + memcpy((*new_t)->substruct, datatype->substruct, sizeof(s_smpi_mpi_struct_t)); + } + if(datatype->name) + (*new_t)->name = strdup(datatype->name); + if(datatype->attributes !=NULL){ + (*new_t)->attributes=xbt_dict_new(); + xbt_dict_cursor_t cursor = NULL; + int *key; + int flag; + void* value_in; + void* value_out; + xbt_dict_foreach(datatype->attributes, cursor, key, value_in){ + smpi_type_key_elem elem = xbt_dict_get_or_null(smpi_type_keyvals, (const char*)key); + if(elem && elem->copy_fn!=MPI_NULL_COPY_FN){ + ret = elem->copy_fn(datatype, atoi((const char*)key), NULL, value_in, &value_out, &flag ); + if(ret!=MPI_SUCCESS){ + *new_t=MPI_DATATYPE_NULL; + return ret; + } + if(flag) + xbt_dict_set((*new_t)->attributes, (const char*)key,value_out, NULL); + } + } + } + return ret; +} + int smpi_datatype_extent(MPI_Datatype datatype, MPI_Aint * lb, MPI_Aint * extent) { - int retval; + if(datatype == MPI_DATATYPE_NULL){ + *lb=0; + *extent=0; + return MPI_SUCCESS; + } + *lb = datatype->lb; + *extent = datatype->ub - datatype->lb; + return MPI_SUCCESS; +} - if ((datatype->flags & DT_FLAG_COMMITED) != DT_FLAG_COMMITED) { - retval = MPI_ERR_TYPE; - } else { - *lb = datatype->lb; - *extent = datatype->ub - datatype->lb; - retval = MPI_SUCCESS; +MPI_Aint smpi_datatype_get_extent(MPI_Datatype datatype){ + if(datatype == MPI_DATATYPE_NULL){ + return 0; } - return retval; + return datatype->ub - datatype->lb; +} + +void smpi_datatype_get_name(MPI_Datatype datatype, char* name, int* length){ + *length = strlen(datatype->name); + strcpy(name, datatype->name); +} + +void smpi_datatype_set_name(MPI_Datatype datatype, char* name){ + datatype->name = strdup(name);; } int smpi_datatype_copy(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype) { - int retval, count; - + int count; + if(smpi_privatize_global_variables){ + smpi_switch_data_segment(smpi_process_index()); + } /* First check if we really have something to do */ - if (recvcount == 0) { - retval = sendcount == 0 ? MPI_SUCCESS : MPI_ERR_TRUNCATE; - } else { + if (recvcount > 0 && recvbuf != sendbuf) { /* FIXME: treat packed cases */ sendcount *= smpi_datatype_size(sendtype); recvcount *= smpi_datatype_size(recvtype); count = sendcount < recvcount ? sendcount : recvcount; if(sendtype->has_subtype == 0 && recvtype->has_subtype == 0) { - memcpy(recvbuf, sendbuf, count); + if(!smpi_process_get_replaying()) memcpy(recvbuf, sendbuf, count); } else if (sendtype->has_subtype == 0) { s_smpi_subtype_t *subtype = recvtype->substruct; - subtype->unserialize( sendbuf, recvbuf,1, subtype); + subtype->unserialize( sendbuf, recvbuf, recvcount/smpi_datatype_size(recvtype), subtype, MPI_REPLACE); } else if (recvtype->has_subtype == 0) { s_smpi_subtype_t *subtype = sendtype->substruct; - subtype->serialize(sendbuf, recvbuf,1, subtype); + subtype->serialize(sendbuf, recvbuf, sendcount/smpi_datatype_size(sendtype), subtype); }else{ s_smpi_subtype_t *subtype = sendtype->substruct; - s_smpi_mpi_vector_t* type_c = (s_smpi_mpi_vector_t*)sendtype; - void * buf_tmp = malloc(count * type_c->size_oldtype); + void * buf_tmp = xbt_malloc(count); - subtype->serialize( sendbuf, buf_tmp,1, subtype); + subtype->serialize( sendbuf, buf_tmp,count/smpi_datatype_size(sendtype), subtype); subtype = recvtype->substruct; - subtype->unserialize(recvbuf, buf_tmp,1, subtype); + subtype->unserialize( buf_tmp, recvbuf,count/smpi_datatype_size(recvtype), subtype, MPI_REPLACE); free(buf_tmp); } - retval = sendcount > recvcount ? MPI_ERR_TRUNCATE : MPI_SUCCESS; } - return retval; + return sendcount > recvcount ? MPI_ERR_TRUNCATE : MPI_SUCCESS; } /* @@ -209,7 +293,7 @@ int smpi_datatype_copy(void *sendbuf, int sendcount, MPI_Datatype sendtype, */ void serialize_vector( const void *noncontiguous_vector, void *contiguous_vector, - size_t count, + int count, void *type) { s_smpi_mpi_vector_t* type_c = (s_smpi_mpi_vector_t*)type; @@ -218,11 +302,20 @@ void serialize_vector( const void *noncontiguous_vector, char* noncontiguous_vector_char = (char*)noncontiguous_vector; for (i = 0; i < type_c->block_count * count; i++) { - memcpy(contiguous_vector_char, - noncontiguous_vector_char, type_c->block_length * type_c->size_oldtype); + if (type_c->old_type->has_subtype == 0) + memcpy(contiguous_vector_char, + noncontiguous_vector_char, type_c->block_length * type_c->size_oldtype); + else + ((s_smpi_subtype_t*)type_c->old_type->substruct)->serialize( noncontiguous_vector_char, + contiguous_vector_char, + type_c->block_length, + type_c->old_type->substruct); contiguous_vector_char += type_c->block_length*type_c->size_oldtype; - noncontiguous_vector_char += type_c->block_stride*type_c->size_oldtype; + if((i+1)%type_c->block_count ==0) + noncontiguous_vector_char += type_c->block_length*smpi_datatype_get_extent(type_c->old_type); + else + noncontiguous_vector_char += type_c->block_stride*smpi_datatype_get_extent(type_c->old_type); } } @@ -237,8 +330,9 @@ void serialize_vector( const void *noncontiguous_vector, */ void unserialize_vector( const void *contiguous_vector, void *noncontiguous_vector, - size_t count, - void *type) + int count, + void *type, + MPI_Op op) { s_smpi_mpi_vector_t* type_c = (s_smpi_mpi_vector_t*)type; int i; @@ -247,11 +341,22 @@ void unserialize_vector( const void *contiguous_vector, char* noncontiguous_vector_char = (char*)noncontiguous_vector; for (i = 0; i < type_c->block_count * count; i++) { - memcpy(noncontiguous_vector_char, - contiguous_vector_char, type_c->block_length * type_c->size_oldtype); - + if (type_c->old_type->has_subtype == 0) + smpi_op_apply(op, contiguous_vector_char, noncontiguous_vector_char, &type_c->block_length, + &type_c->old_type); + /* memcpy(noncontiguous_vector_char, + contiguous_vector_char, type_c->block_length * type_c->size_oldtype);*/ + else + ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_vector_char, + noncontiguous_vector_char, + type_c->block_length, + type_c->old_type->substruct, + op); contiguous_vector_char += type_c->block_length*type_c->size_oldtype; - noncontiguous_vector_char += type_c->block_stride*type_c->size_oldtype; + if((i+1)%type_c->block_count ==0) + noncontiguous_vector_char += type_c->block_length*smpi_datatype_get_extent(type_c->old_type); + else + noncontiguous_vector_char += type_c->block_stride*smpi_datatype_get_extent(type_c->old_type); } } @@ -273,36 +378,183 @@ s_smpi_mpi_vector_t* smpi_datatype_vector_create( int block_stride, new_t->block_stride = block_stride; new_t->block_length = block_length; new_t->block_count = block_count; + smpi_datatype_use(old_type); new_t->old_type = old_type; new_t->size_oldtype = size_oldtype; return new_t; } -void smpi_datatype_create(MPI_Datatype* new_type, int size,int extent, int has_subtype, +void smpi_datatype_create(MPI_Datatype* new_type, int size,int lb, int ub, int has_subtype, void *struct_type, int flags){ MPI_Datatype new_t= xbt_new(s_smpi_mpi_datatype_t,1); + new_t->name = NULL; new_t->size = size; - new_t->has_subtype = has_subtype; - new_t->lb = 0; - new_t->ub = extent; + new_t->has_subtype = size>0? has_subtype:0; + new_t->lb = lb; + new_t->ub = ub; new_t->flags = flags; new_t->substruct = struct_type; + new_t->in_use=0; + new_t->attributes=NULL; *new_type = new_t; + +#ifdef HAVE_MC + if(MC_is_active()) + MC_ignore(&(new_t->in_use), sizeof(new_t->in_use)); +#endif } void smpi_datatype_free(MPI_Datatype* type){ + if((*type)->attributes !=NULL){ + xbt_dict_cursor_t cursor = NULL; + int* key; + void * value; + int flag; + xbt_dict_foreach((*type)->attributes, cursor, key, value){ + smpi_type_key_elem elem = xbt_dict_get_or_null(smpi_type_keyvals, (const char*)key); + if(elem && elem->delete_fn) + elem->delete_fn(*type, atoi((const char*)key), value, &flag); + } + } + + if((*type)->flags & DT_FLAG_PREDEFINED)return; + + //if still used, mark for deletion + if((*type)->in_use!=0){ + (*type)->flags |=DT_FLAG_DESTROYED; + return; + } + if ((*type)->has_subtype == 1){ ((s_smpi_subtype_t *)(*type)->substruct)->subtype_free(type); + xbt_free((*type)->substruct); + } + if ((*type)->name != NULL){ + xbt_free((*type)->name); } xbt_free(*type); + *type = MPI_DATATYPE_NULL; +} + +void smpi_datatype_use(MPI_Datatype type){ + if(type)type->in_use++; + +#ifdef HAVE_MC + if(MC_is_active()) + MC_ignore(&(type->in_use), sizeof(type->in_use)); +#endif +} + + +void smpi_datatype_unuse(MPI_Datatype type){ + if(type && type->in_use-- == 0 && (type->flags & DT_FLAG_DESTROYED)) + smpi_datatype_free(&type); + +#ifdef HAVE_MC + if(MC_is_active()) + MC_ignore(&(type->in_use), sizeof(type->in_use)); +#endif +} + + + + +/* +Contiguous Implementation +*/ + + +/* + * Copies noncontiguous data into contiguous memory. + * @param contiguous_hvector - output hvector + * @param noncontiguous_hvector - input hvector + * @param type - pointer contening : + * - stride - stride of between noncontiguous data, in bytes + * - block_length - the width or height of blocked matrix + * - count - the number of rows of matrix + */ +void serialize_contiguous( const void *noncontiguous_hvector, + void *contiguous_hvector, + int count, + void *type) +{ + s_smpi_mpi_contiguous_t* type_c = (s_smpi_mpi_contiguous_t*)type; + char* contiguous_vector_char = (char*)contiguous_hvector; + char* noncontiguous_vector_char = (char*)noncontiguous_hvector+type_c->lb; + memcpy(contiguous_vector_char, + noncontiguous_vector_char, count* type_c->block_count * type_c->size_oldtype); +} +/* + * Copies contiguous data into noncontiguous memory. + * @param noncontiguous_vector - output hvector + * @param contiguous_vector - input hvector + * @param type - pointer contening : + * - stride - stride of between noncontiguous data, in bytes + * - block_length - the width or height of blocked matrix + * - count - the number of rows of matrix + */ +void unserialize_contiguous( const void *contiguous_vector, + void *noncontiguous_vector, + int count, + void *type, + MPI_Op op) +{ + s_smpi_mpi_contiguous_t* type_c = (s_smpi_mpi_contiguous_t*)type; + char* contiguous_vector_char = (char*)contiguous_vector; + char* noncontiguous_vector_char = (char*)noncontiguous_vector+type_c->lb; + int n= count* type_c->block_count; + smpi_op_apply(op, contiguous_vector_char, noncontiguous_vector_char, &n, + &type_c->old_type); + /*memcpy(noncontiguous_vector_char, + contiguous_vector_char, count* type_c->block_count * type_c->size_oldtype);*/ +} + +void free_contiguous(MPI_Datatype* d){ + smpi_datatype_unuse(((s_smpi_mpi_indexed_t *)(*d)->substruct)->old_type); +} + +/* + * Create a Sub type contiguous to be able to serialize and unserialize it + * the structure s_smpi_mpi_contiguous_t is derived from s_smpi_subtype which + * required the functions unserialize and serialize + * + */ +s_smpi_mpi_contiguous_t* smpi_datatype_contiguous_create( MPI_Aint lb, + int block_count, + MPI_Datatype old_type, + int size_oldtype){ + s_smpi_mpi_contiguous_t *new_t= xbt_new(s_smpi_mpi_contiguous_t,1); + new_t->base.serialize = &serialize_contiguous; + new_t->base.unserialize = &unserialize_contiguous; + new_t->base.subtype_free = &free_contiguous; + new_t->lb = lb; + new_t->block_count = block_count; + new_t->old_type = old_type; + new_t->size_oldtype = size_oldtype; + smpi_datatype_use(old_type); + return new_t; } -int smpi_datatype_contiguous(int count, MPI_Datatype old_type, MPI_Datatype* new_type) + + + +int smpi_datatype_contiguous(int count, MPI_Datatype old_type, MPI_Datatype* new_type, MPI_Aint lb) { int retval; - smpi_datatype_create(new_type, count * - smpi_datatype_size(old_type),count * - smpi_datatype_size(old_type),0,NULL, DT_FLAG_CONTIGUOUS); + if(old_type->has_subtype){ + //handle this case as a hvector with stride equals to the extent of the datatype + return smpi_datatype_hvector(count, 1, smpi_datatype_get_extent(old_type), old_type, new_type); + } + + s_smpi_mpi_contiguous_t* subtype = smpi_datatype_contiguous_create( lb, + count, + old_type, + smpi_datatype_size(old_type)); + + smpi_datatype_create(new_type, + count * smpi_datatype_size(old_type), + lb,lb + count * smpi_datatype_size(old_type), + 1,subtype, DT_FLAG_CONTIGUOUS); retval=MPI_SUCCESS; return retval; } @@ -310,18 +562,24 @@ int smpi_datatype_contiguous(int count, MPI_Datatype old_type, MPI_Datatype* new int smpi_datatype_vector(int count, int blocklen, int stride, MPI_Datatype old_type, MPI_Datatype* new_type) { int retval; - if (blocklen<=0) return MPI_ERR_ARG; - if(stride != blocklen){ - if (old_type->has_subtype == 1) - XBT_WARN("vector contains a complex type - not yet handled"); + if (blocklen<0) return MPI_ERR_ARG; + MPI_Aint lb = 0; + MPI_Aint ub = 0; + if(count>0){ + lb=smpi_datatype_lb(old_type); + ub=((count-1)*stride+blocklen-1)*smpi_datatype_get_extent(old_type)+smpi_datatype_ub(old_type); + } + if(old_type->has_subtype || stride != blocklen){ + + s_smpi_mpi_vector_t* subtype = smpi_datatype_vector_create( stride, blocklen, count, old_type, smpi_datatype_size(old_type)); - smpi_datatype_create(new_type, count * (blocklen) * - smpi_datatype_size(old_type), - ((count -1) * stride + blocklen) * smpi_datatype_size(old_type), + smpi_datatype_create(new_type, + count * (blocklen) * smpi_datatype_size(old_type), lb, + ub, 1, subtype, DT_FLAG_VECTOR); @@ -330,7 +588,7 @@ int smpi_datatype_vector(int count, int blocklen, int stride, MPI_Datatype old_t /* in this situation the data are contignous thus it's not * required to serialize and unserialize it*/ smpi_datatype_create(new_type, count * blocklen * - smpi_datatype_size(old_type), ((count -1) * stride + blocklen)* + smpi_datatype_size(old_type), 0, ((count -1) * stride + blocklen)* smpi_datatype_size(old_type), 0, NULL, @@ -341,6 +599,7 @@ int smpi_datatype_vector(int count, int blocklen, int stride, MPI_Datatype old_t } void free_vector(MPI_Datatype* d){ + smpi_datatype_unuse(((s_smpi_mpi_indexed_t *)(*d)->substruct)->old_type); } /* @@ -359,7 +618,7 @@ Hvector Implementation - Vector with stride in bytes */ void serialize_hvector( const void *noncontiguous_hvector, void *contiguous_hvector, - size_t count, + int count, void *type) { s_smpi_mpi_hvector_t* type_c = (s_smpi_mpi_hvector_t*)type; @@ -368,10 +627,19 @@ void serialize_hvector( const void *noncontiguous_hvector, char* noncontiguous_vector_char = (char*)noncontiguous_hvector; for (i = 0; i < type_c->block_count * count; i++) { - memcpy(contiguous_vector_char, + if (type_c->old_type->has_subtype == 0) + memcpy(contiguous_vector_char, noncontiguous_vector_char, type_c->block_length * type_c->size_oldtype); + else + ((s_smpi_subtype_t*)type_c->old_type->substruct)->serialize( noncontiguous_vector_char, + contiguous_vector_char, + type_c->block_length, + type_c->old_type->substruct); contiguous_vector_char += type_c->block_length*type_c->size_oldtype; + if((i+1)%type_c->block_count ==0) + noncontiguous_vector_char += type_c->block_length*type_c->size_oldtype; + else noncontiguous_vector_char += type_c->block_stride; } } @@ -386,8 +654,9 @@ void serialize_hvector( const void *noncontiguous_hvector, */ void unserialize_hvector( const void *contiguous_vector, void *noncontiguous_vector, - size_t count, - void *type) + int count, + void *type, + MPI_Op op) { s_smpi_mpi_hvector_t* type_c = (s_smpi_mpi_hvector_t*)type; int i; @@ -396,10 +665,21 @@ void unserialize_hvector( const void *contiguous_vector, char* noncontiguous_vector_char = (char*)noncontiguous_vector; for (i = 0; i < type_c->block_count * count; i++) { - memcpy(noncontiguous_vector_char, - contiguous_vector_char, type_c->block_length * type_c->size_oldtype); - + if (type_c->old_type->has_subtype == 0) + smpi_op_apply(op, contiguous_vector_char, noncontiguous_vector_char, &type_c->block_length, + &type_c->old_type); + /*memcpy(noncontiguous_vector_char, + contiguous_vector_char, type_c->block_length * type_c->size_oldtype);*/ + else + ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_vector_char, + noncontiguous_vector_char, + type_c->block_length, + type_c->old_type->substruct, + op); contiguous_vector_char += type_c->block_length*type_c->size_oldtype; + if((i+1)%type_c->block_count ==0) + noncontiguous_vector_char += type_c->block_length*type_c->size_oldtype; + else noncontiguous_vector_char += type_c->block_stride; } } @@ -424,36 +704,41 @@ s_smpi_mpi_hvector_t* smpi_datatype_hvector_create( MPI_Aint block_stride, new_t->block_count = block_count; new_t->old_type = old_type; new_t->size_oldtype = size_oldtype; + smpi_datatype_use(old_type); return new_t; } //do nothing for vector types void free_hvector(MPI_Datatype* d){ + smpi_datatype_unuse(((s_smpi_mpi_indexed_t *)(*d)->substruct)->old_type); } int smpi_datatype_hvector(int count, int blocklen, MPI_Aint stride, MPI_Datatype old_type, MPI_Datatype* new_type) { int retval; - if (blocklen<=0) return MPI_ERR_ARG; - if (old_type->has_subtype == 1) - XBT_WARN("hvector contains a complex type - not yet handled"); - if(stride != blocklen*smpi_datatype_size(old_type)){ + if (blocklen<0) return MPI_ERR_ARG; + MPI_Aint lb = 0; + MPI_Aint ub = 0; + if(count>0){ + lb=smpi_datatype_lb(old_type); + ub=((count-1)*stride)+(blocklen-1)*smpi_datatype_get_extent(old_type)+smpi_datatype_ub(old_type); + } + if(old_type->has_subtype || stride != blocklen*smpi_datatype_get_extent(old_type)){ s_smpi_mpi_hvector_t* subtype = smpi_datatype_hvector_create( stride, blocklen, count, old_type, smpi_datatype_size(old_type)); - smpi_datatype_create(new_type, count * blocklen * - smpi_datatype_size(old_type), (count-1) * stride + blocklen * - smpi_datatype_size(old_type), + smpi_datatype_create(new_type, count * blocklen * smpi_datatype_size(old_type), + lb,ub, 1, subtype, DT_FLAG_VECTOR); retval=MPI_SUCCESS; }else{ smpi_datatype_create(new_type, count * blocklen * - smpi_datatype_size(old_type),count * blocklen * + smpi_datatype_size(old_type),0,count * blocklen * smpi_datatype_size(old_type), 0, NULL, @@ -479,21 +764,28 @@ Indexed Implementation */ void serialize_indexed( const void *noncontiguous_indexed, void *contiguous_indexed, - size_t count, + int count, void *type) { s_smpi_mpi_indexed_t* type_c = (s_smpi_mpi_indexed_t*)type; int i,j; char* contiguous_indexed_char = (char*)contiguous_indexed; - char* noncontiguous_indexed_char = (char*)noncontiguous_indexed; + char* noncontiguous_indexed_char = (char*)noncontiguous_indexed+type_c->block_indices[0] * type_c->size_oldtype; for(j=0; jblock_count; i++) { - memcpy(contiguous_indexed_char, - noncontiguous_indexed_char, type_c->block_lengths[i] * type_c->size_oldtype); + if (type_c->old_type->has_subtype == 0) + memcpy(contiguous_indexed_char, + noncontiguous_indexed_char, type_c->block_lengths[i] * type_c->size_oldtype); + else + ((s_smpi_subtype_t*)type_c->old_type->substruct)->serialize( noncontiguous_indexed_char, + contiguous_indexed_char, + type_c->block_lengths[i], + type_c->old_type->substruct); + contiguous_indexed_char += type_c->block_lengths[i]*type_c->size_oldtype; - if (iblock_count-1)noncontiguous_indexed_char = (char*)noncontiguous_indexed + type_c->block_indices[i+1]*type_c->size_oldtype; - else noncontiguous_indexed_char += type_c->block_lengths[i]*type_c->size_oldtype; + if (iblock_count-1)noncontiguous_indexed_char = (char*)noncontiguous_indexed + type_c->block_indices[i+1]*smpi_datatype_get_extent(type_c->old_type); + else noncontiguous_indexed_char += type_c->block_lengths[i]*smpi_datatype_get_extent(type_c->old_type); } noncontiguous_indexed=(void*)noncontiguous_indexed_char; } @@ -509,22 +801,33 @@ void serialize_indexed( const void *noncontiguous_indexed, */ void unserialize_indexed( const void *contiguous_indexed, void *noncontiguous_indexed, - size_t count, - void *type) + int count, + void *type, + MPI_Op op) { + s_smpi_mpi_indexed_t* type_c = (s_smpi_mpi_indexed_t*)type; int i,j; - char* contiguous_indexed_char = (char*)contiguous_indexed; - char* noncontiguous_indexed_char = (char*)noncontiguous_indexed; + char* noncontiguous_indexed_char = (char*)noncontiguous_indexed+type_c->block_indices[0]*smpi_datatype_get_extent(type_c->old_type); for(j=0; jblock_count; i++) { - memcpy(noncontiguous_indexed_char, - contiguous_indexed_char, type_c->block_lengths[i] * type_c->size_oldtype); + if (type_c->old_type->has_subtype == 0) + smpi_op_apply(op, contiguous_indexed_char, noncontiguous_indexed_char, &type_c->block_lengths[i], + &type_c->old_type); + /*memcpy(noncontiguous_indexed_char , + contiguous_indexed_char, type_c->block_lengths[i] * type_c->size_oldtype);*/ + else + ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_indexed_char, + noncontiguous_indexed_char, + type_c->block_lengths[i], + type_c->old_type->substruct, + op); contiguous_indexed_char += type_c->block_lengths[i]*type_c->size_oldtype; - if (iblock_count-1)noncontiguous_indexed_char = (char*)noncontiguous_indexed + type_c->block_indices[i+1]*type_c->size_oldtype; - else noncontiguous_indexed_char += type_c->block_lengths[i]*type_c->size_oldtype; + if (iblock_count-1) + noncontiguous_indexed_char = (char*)noncontiguous_indexed + type_c->block_indices[i+1]*smpi_datatype_get_extent(type_c->old_type); + else noncontiguous_indexed_char += type_c->block_lengths[i]*smpi_datatype_get_extent(type_c->old_type); } noncontiguous_indexed=(void*)noncontiguous_indexed_char; } @@ -533,6 +836,7 @@ void unserialize_indexed( const void *contiguous_indexed, void free_indexed(MPI_Datatype* type){ xbt_free(((s_smpi_mpi_indexed_t *)(*type)->substruct)->block_lengths); xbt_free(((s_smpi_mpi_indexed_t *)(*type)->substruct)->block_indices); + smpi_datatype_unuse(((s_smpi_mpi_indexed_t *)(*type)->substruct)->old_type); } /* @@ -558,6 +862,7 @@ s_smpi_mpi_indexed_t* smpi_datatype_indexed_create( int* block_lengths, new_t->block_indices[i]=block_indices[i]; } new_t->block_count = block_count; + smpi_datatype_use(old_type); new_t->old_type = old_type; new_t->size_oldtype = size_oldtype; return new_t; @@ -570,15 +875,27 @@ int smpi_datatype_indexed(int count, int* blocklens, int* indices, MPI_Datatype int retval; int size = 0; int contiguous=1; + MPI_Aint lb = 0; + MPI_Aint ub = 0; + if(count>0){ + lb=indices[0]*smpi_datatype_get_extent(old_type); + ub=indices[0]*smpi_datatype_get_extent(old_type) + blocklens[0]*smpi_datatype_ub(old_type); + } + for(i=0; i< count; i++){ - if (blocklens[i]<=0) + if (blocklens[i]<0) return MPI_ERR_ARG; size += blocklens[i]; + if(indices[i]*smpi_datatype_get_extent(old_type)+smpi_datatype_lb(old_type)ub) + ub = indices[i]*smpi_datatype_get_extent(old_type)+blocklens[i]*smpi_datatype_ub(old_type); + if ( (i< count -1) && (indices[i]+blocklens[i] != indices[i+1]) )contiguous=0; } if (old_type->has_subtype == 1) - XBT_WARN("indexed contains a complex type - not yet handled"); + contiguous=0; if(!contiguous){ s_smpi_mpi_indexed_t* subtype = smpi_datatype_indexed_create( blocklens, @@ -587,11 +904,14 @@ int smpi_datatype_indexed(int count, int* blocklens, int* indices, MPI_Datatype old_type, smpi_datatype_size(old_type)); smpi_datatype_create(new_type, size * - smpi_datatype_size(old_type),(indices[count-1]+blocklens[count-1])*smpi_datatype_size(old_type),1, subtype, DT_FLAG_DATA); + smpi_datatype_size(old_type),lb,ub,1, subtype, DT_FLAG_DATA); }else{ + s_smpi_mpi_contiguous_t* subtype = smpi_datatype_contiguous_create( lb, + size, + old_type, + smpi_datatype_size(old_type)); smpi_datatype_create(new_type, size * - smpi_datatype_size(old_type),size * - smpi_datatype_size(old_type),0, NULL, DT_FLAG_DATA|DT_FLAG_CONTIGUOUS); + smpi_datatype_size(old_type),lb,ub,1, subtype, DT_FLAG_DATA|DT_FLAG_CONTIGUOUS); } retval=MPI_SUCCESS; return retval; @@ -613,21 +933,27 @@ Hindexed Implementation - Indexed with indices in bytes */ void serialize_hindexed( const void *noncontiguous_hindexed, void *contiguous_hindexed, - size_t count, + int count, void *type) { s_smpi_mpi_hindexed_t* type_c = (s_smpi_mpi_hindexed_t*)type; int i,j; char* contiguous_hindexed_char = (char*)contiguous_hindexed; - char* noncontiguous_hindexed_char = (char*)noncontiguous_hindexed; + char* noncontiguous_hindexed_char = (char*)noncontiguous_hindexed+ type_c->block_indices[0]; for(j=0; jblock_count; i++) { - memcpy(contiguous_hindexed_char, - noncontiguous_hindexed_char, type_c->block_lengths[i] * type_c->size_oldtype); + if (type_c->old_type->has_subtype == 0) + memcpy(contiguous_hindexed_char, + noncontiguous_hindexed_char, type_c->block_lengths[i] * type_c->size_oldtype); + else + ((s_smpi_subtype_t*)type_c->old_type->substruct)->serialize( noncontiguous_hindexed_char, + contiguous_hindexed_char, + type_c->block_lengths[i], + type_c->old_type->substruct); contiguous_hindexed_char += type_c->block_lengths[i]*type_c->size_oldtype; if (iblock_count-1)noncontiguous_hindexed_char = (char*)noncontiguous_hindexed + type_c->block_indices[i+1]; - else noncontiguous_hindexed_char += type_c->block_lengths[i]*type_c->size_oldtype; + else noncontiguous_hindexed_char += type_c->block_lengths[i]*smpi_datatype_get_extent(type_c->old_type); } noncontiguous_hindexed=(void*)noncontiguous_hindexed_char; } @@ -643,22 +969,32 @@ void serialize_hindexed( const void *noncontiguous_hindexed, */ void unserialize_hindexed( const void *contiguous_hindexed, void *noncontiguous_hindexed, - size_t count, - void *type) + int count, + void *type, + MPI_Op op) { s_smpi_mpi_hindexed_t* type_c = (s_smpi_mpi_hindexed_t*)type; int i,j; char* contiguous_hindexed_char = (char*)contiguous_hindexed; - char* noncontiguous_hindexed_char = (char*)noncontiguous_hindexed; + char* noncontiguous_hindexed_char = (char*)noncontiguous_hindexed+ type_c->block_indices[0]; for(j=0; jblock_count; i++) { - memcpy(noncontiguous_hindexed_char, - contiguous_hindexed_char, type_c->block_lengths[i] * type_c->size_oldtype); + if (type_c->old_type->has_subtype == 0) + smpi_op_apply(op, contiguous_hindexed_char, noncontiguous_hindexed_char, &type_c->block_lengths[i], + &type_c->old_type); + /*memcpy(noncontiguous_hindexed_char, + contiguous_hindexed_char, type_c->block_lengths[i] * type_c->size_oldtype);*/ + else + ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_hindexed_char, + noncontiguous_hindexed_char, + type_c->block_lengths[i], + type_c->old_type->substruct, + op); contiguous_hindexed_char += type_c->block_lengths[i]*type_c->size_oldtype; if (iblock_count-1)noncontiguous_hindexed_char = (char*)noncontiguous_hindexed + type_c->block_indices[i+1]; - else noncontiguous_hindexed_char += type_c->block_lengths[i]*type_c->size_oldtype; + else noncontiguous_hindexed_char += type_c->block_lengths[i]*smpi_datatype_get_extent(type_c->old_type); } noncontiguous_hindexed=(void*)noncontiguous_hindexed_char; } @@ -667,6 +1003,7 @@ void unserialize_hindexed( const void *contiguous_hindexed, void free_hindexed(MPI_Datatype* type){ xbt_free(((s_smpi_mpi_hindexed_t *)(*type)->substruct)->block_lengths); xbt_free(((s_smpi_mpi_hindexed_t *)(*type)->substruct)->block_indices); + smpi_datatype_unuse(((s_smpi_mpi_indexed_t *)(*type)->substruct)->old_type); } /* @@ -704,27 +1041,43 @@ int smpi_datatype_hindexed(int count, int* blocklens, MPI_Aint* indices, MPI_Dat int retval; int size = 0; int contiguous=1; + MPI_Aint lb = 0; + MPI_Aint ub = 0; + if(count>0){ + lb=indices[0] + smpi_datatype_lb(old_type); + ub=indices[0] + blocklens[0]*smpi_datatype_ub(old_type); + } for(i=0; i< count; i++){ - if (blocklens[i]<=0) + if (blocklens[i]<0) return MPI_ERR_ARG; size += blocklens[i]; + + if(indices[i]+smpi_datatype_lb(old_type)ub) ub = indices[i]+blocklens[i]*smpi_datatype_ub(old_type); + if ( (i< count -1) && (indices[i]+blocklens[i]*smpi_datatype_size(old_type) != indices[i+1]) )contiguous=0; } - if (old_type->has_subtype == 1) - XBT_WARN("hindexed contains a complex type - not yet handled"); + if (old_type->has_subtype == 1 || lb!=0) + contiguous=0; + if(!contiguous){ s_smpi_mpi_hindexed_t* subtype = smpi_datatype_hindexed_create( blocklens, indices, count, old_type, smpi_datatype_size(old_type)); - smpi_datatype_create(new_type, size * - smpi_datatype_size(old_type),indices[count-1]+blocklens[count-1]*smpi_datatype_size(old_type) + smpi_datatype_create(new_type, size * smpi_datatype_size(old_type), + lb, + ub ,1, subtype, DT_FLAG_DATA); }else{ - smpi_datatype_create(new_type, size * - smpi_datatype_size(old_type),size * - smpi_datatype_size(old_type),0, NULL, DT_FLAG_DATA|DT_FLAG_CONTIGUOUS); + s_smpi_mpi_contiguous_t* subtype = smpi_datatype_contiguous_create( lb, + size, + old_type, + smpi_datatype_size(old_type)); + smpi_datatype_create(new_type, size * smpi_datatype_size(old_type), + 0,size * smpi_datatype_size(old_type), + 1, subtype, DT_FLAG_DATA|DT_FLAG_CONTIGUOUS); } retval=MPI_SUCCESS; return retval; @@ -746,20 +1099,28 @@ struct Implementation - Indexed with indices in bytes */ void serialize_struct( const void *noncontiguous_struct, void *contiguous_struct, - size_t count, + int count, void *type) { s_smpi_mpi_struct_t* type_c = (s_smpi_mpi_struct_t*)type; int i,j; char* contiguous_struct_char = (char*)contiguous_struct; - char* noncontiguous_struct_char = (char*)noncontiguous_struct; + char* noncontiguous_struct_char = (char*)noncontiguous_struct+ type_c->block_indices[0]; for(j=0; jblock_count; i++) { - memcpy(contiguous_struct_char, + if (type_c->old_types[i]->has_subtype == 0) + memcpy(contiguous_struct_char, noncontiguous_struct_char, type_c->block_lengths[i] * smpi_datatype_size(type_c->old_types[i])); + else + ((s_smpi_subtype_t*)type_c->old_types[i]->substruct)->serialize( noncontiguous_struct_char, + contiguous_struct_char, + type_c->block_lengths[i], + type_c->old_types[i]->substruct); + + contiguous_struct_char += type_c->block_lengths[i]*smpi_datatype_size(type_c->old_types[i]); if (iblock_count-1)noncontiguous_struct_char = (char*)noncontiguous_struct + type_c->block_indices[i+1]; - else noncontiguous_struct_char += type_c->block_lengths[i]*smpi_datatype_size(type_c->old_types[i]);//let's hope this is MPI_UB ? + else noncontiguous_struct_char += type_c->block_lengths[i]*smpi_datatype_get_extent(type_c->old_types[i]);//let's hope this is MPI_UB ? } noncontiguous_struct=(void*)noncontiguous_struct_char; } @@ -775,21 +1136,32 @@ void serialize_struct( const void *noncontiguous_struct, */ void unserialize_struct( const void *contiguous_struct, void *noncontiguous_struct, - size_t count, - void *type) + int count, + void *type, + MPI_Op op) { s_smpi_mpi_struct_t* type_c = (s_smpi_mpi_struct_t*)type; int i,j; char* contiguous_struct_char = (char*)contiguous_struct; - char* noncontiguous_struct_char = (char*)noncontiguous_struct; + char* noncontiguous_struct_char = (char*)noncontiguous_struct+ type_c->block_indices[0]; for(j=0; jblock_count; i++) { - memcpy(noncontiguous_struct_char, - contiguous_struct_char, type_c->block_lengths[i] * smpi_datatype_size(type_c->old_types[i])); + if (type_c->old_types[i]->has_subtype == 0) + smpi_op_apply(op, contiguous_struct_char, noncontiguous_struct_char, &type_c->block_lengths[i], + & type_c->old_types[i]); + /*memcpy(noncontiguous_struct_char, + contiguous_struct_char, type_c->block_lengths[i] * smpi_datatype_size(type_c->old_types[i]));*/ + else + ((s_smpi_subtype_t*)type_c->old_types[i]->substruct)->unserialize( contiguous_struct_char, + noncontiguous_struct_char, + type_c->block_lengths[i], + type_c->old_types[i]->substruct, + op); + contiguous_struct_char += type_c->block_lengths[i]*smpi_datatype_size(type_c->old_types[i]); if (iblock_count-1)noncontiguous_struct_char = (char*)noncontiguous_struct + type_c->block_indices[i+1]; - else noncontiguous_struct_char += type_c->block_lengths[i]*smpi_datatype_size(type_c->old_types[i]); + else noncontiguous_struct_char += type_c->block_lengths[i]*smpi_datatype_get_extent(type_c->old_types[i]); } noncontiguous_struct=(void*)noncontiguous_struct_char; @@ -799,6 +1171,9 @@ void unserialize_struct( const void *contiguous_struct, void free_struct(MPI_Datatype* type){ xbt_free(((s_smpi_mpi_struct_t *)(*type)->substruct)->block_lengths); xbt_free(((s_smpi_mpi_struct_t *)(*type)->substruct)->block_indices); + int i=0; + for (i = 0; i < ((s_smpi_mpi_struct_t *)(*type)->substruct)->block_count; i++) + smpi_datatype_unuse(((s_smpi_mpi_struct_t *)(*type)->substruct)->old_types[i]); xbt_free(((s_smpi_mpi_struct_t *)(*type)->substruct)->old_types); } @@ -824,6 +1199,7 @@ s_smpi_mpi_struct_t* smpi_datatype_struct_create( int* block_lengths, new_t->block_lengths[i]=block_lengths[i]; new_t->block_indices[i]=block_indices[i]; new_t->old_types[i]=old_types[i]; + smpi_datatype_use(new_t->old_types[i]); } //new_t->block_lengths = block_lengths; //new_t->block_indices = block_indices; @@ -839,12 +1215,32 @@ int smpi_datatype_struct(int count, int* blocklens, MPI_Aint* indices, MPI_Datat size_t size = 0; int contiguous=1; size = 0; + MPI_Aint lb = 0; + MPI_Aint ub = 0; + if(count>0){ + lb=indices[0] + smpi_datatype_lb(old_types[0]); + ub=indices[0] + blocklens[0]*smpi_datatype_ub(old_types[0]); + } + int forced_lb=0; + int forced_ub=0; for(i=0; i< count; i++){ - if (blocklens[i]<=0) + if (blocklens[i]<0) return MPI_ERR_ARG; if (old_types[i]->has_subtype == 1) - XBT_WARN("Struct contains a complex type - not yet handled"); + contiguous=0; + size += blocklens[i]*smpi_datatype_size(old_types[i]); + if (old_types[i]==MPI_LB){ + lb=indices[i]; + forced_lb=1; + } + if (old_types[i]==MPI_UB){ + ub=indices[i]; + forced_ub=1; + } + + if(!forced_lb && indices[i]+smpi_datatype_lb(old_types[i])ub) ub = indices[i]+blocklens[i]*smpi_datatype_ub(old_types[i]); if ( (i< count -1) && (indices[i]+blocklens[i]*smpi_datatype_size(old_types[i]) != indices[i+1]) )contiguous=0; } @@ -855,9 +1251,13 @@ int smpi_datatype_struct(int count, int* blocklens, MPI_Aint* indices, MPI_Datat count, old_types); - smpi_datatype_create(new_type, size, indices[count-1] + blocklens[count-1]*smpi_datatype_size(old_types[count-1]),1, subtype, DT_FLAG_DATA); + smpi_datatype_create(new_type, size, lb, ub,1, subtype, DT_FLAG_DATA); }else{ - smpi_datatype_create(new_type, size, indices[count-1] + blocklens[count-1]*smpi_datatype_size(old_types[count-1]),0, NULL, DT_FLAG_DATA|DT_FLAG_CONTIGUOUS); + s_smpi_mpi_contiguous_t* subtype = smpi_datatype_contiguous_create( lb, + size, + MPI_CHAR, + 1); + smpi_datatype_create(new_type, size, lb, ub,1, subtype, DT_FLAG_DATA|DT_FLAG_CONTIGUOUS); } return MPI_SUCCESS; } @@ -869,6 +1269,7 @@ void smpi_datatype_commit(MPI_Datatype *datatype) typedef struct s_smpi_mpi_op { MPI_User_function *func; + int is_commute; } s_smpi_mpi_op_t; #define MAX_OP(a, b) (b) = (a) < (b) ? (b) : (a) @@ -883,7 +1284,6 @@ typedef struct s_smpi_mpi_op { #define BXOR_OP(a, b) (b) ^= (a) #define MAXLOC_OP(a, b) (b) = (a.value) < (b.value) ? (b) : (a) #define MINLOC_OP(a, b) (b) = (a.value) < (b.value) ? (a) : (b) -//TODO : MINLOC & MAXLOC #define APPLY_FUNC(a, b, length, type, func) \ { \ @@ -912,6 +1312,8 @@ static void max_func(void *a, void *b, int *length, APPLY_FUNC(a, b, length, unsigned int, MAX_OP); } else if (*datatype == MPI_UNSIGNED_LONG) { APPLY_FUNC(a, b, length, unsigned long, MAX_OP); + } else if (*datatype == MPI_UNSIGNED_CHAR) { + APPLY_FUNC(a, b, length, unsigned char, MAX_OP); } else if (*datatype == MPI_FLOAT) { APPLY_FUNC(a, b, length, float, MAX_OP); } else if (*datatype == MPI_DOUBLE) { @@ -938,6 +1340,8 @@ static void min_func(void *a, void *b, int *length, APPLY_FUNC(a, b, length, unsigned int, MIN_OP); } else if (*datatype == MPI_UNSIGNED_LONG) { APPLY_FUNC(a, b, length, unsigned long, MIN_OP); + } else if (*datatype == MPI_UNSIGNED_CHAR) { + APPLY_FUNC(a, b, length, unsigned char, MIN_OP); } else if (*datatype == MPI_FLOAT) { APPLY_FUNC(a, b, length, float, MIN_OP); } else if (*datatype == MPI_DOUBLE) { @@ -964,6 +1368,8 @@ static void sum_func(void *a, void *b, int *length, APPLY_FUNC(a, b, length, unsigned int, SUM_OP); } else if (*datatype == MPI_UNSIGNED_LONG) { APPLY_FUNC(a, b, length, unsigned long, SUM_OP); + } else if (*datatype == MPI_UNSIGNED_CHAR) { + APPLY_FUNC(a, b, length, unsigned char, SUM_OP); } else if (*datatype == MPI_FLOAT) { APPLY_FUNC(a, b, length, float, SUM_OP); } else if (*datatype == MPI_DOUBLE) { @@ -996,6 +1402,8 @@ static void prod_func(void *a, void *b, int *length, APPLY_FUNC(a, b, length, unsigned int, PROD_OP); } else if (*datatype == MPI_UNSIGNED_LONG) { APPLY_FUNC(a, b, length, unsigned long, PROD_OP); + } else if (*datatype == MPI_UNSIGNED_CHAR) { + APPLY_FUNC(a, b, length, unsigned char, PROD_OP); } else if (*datatype == MPI_FLOAT) { APPLY_FUNC(a, b, length, float, PROD_OP); } else if (*datatype == MPI_DOUBLE) { @@ -1028,6 +1436,8 @@ static void land_func(void *a, void *b, int *length, APPLY_FUNC(a, b, length, unsigned int, LAND_OP); } else if (*datatype == MPI_UNSIGNED_LONG) { APPLY_FUNC(a, b, length, unsigned long, LAND_OP); + } else if (*datatype == MPI_UNSIGNED_CHAR) { + APPLY_FUNC(a, b, length, unsigned char, LAND_OP); } else if (*datatype == MPI_C_BOOL) { APPLY_FUNC(a, b, length, _Bool, LAND_OP); } @@ -1050,6 +1460,8 @@ static void lor_func(void *a, void *b, int *length, APPLY_FUNC(a, b, length, unsigned int, LOR_OP); } else if (*datatype == MPI_UNSIGNED_LONG) { APPLY_FUNC(a, b, length, unsigned long, LOR_OP); + } else if (*datatype == MPI_UNSIGNED_CHAR) { + APPLY_FUNC(a, b, length, unsigned char, LOR_OP); } else if (*datatype == MPI_C_BOOL) { APPLY_FUNC(a, b, length, _Bool, LOR_OP); } @@ -1072,6 +1484,8 @@ static void lxor_func(void *a, void *b, int *length, APPLY_FUNC(a, b, length, unsigned int, LXOR_OP); } else if (*datatype == MPI_UNSIGNED_LONG) { APPLY_FUNC(a, b, length, unsigned long, LXOR_OP); + } else if (*datatype == MPI_UNSIGNED_CHAR) { + APPLY_FUNC(a, b, length, unsigned char, LXOR_OP); } else if (*datatype == MPI_C_BOOL) { APPLY_FUNC(a, b, length, _Bool, LXOR_OP); } @@ -1082,8 +1496,7 @@ static void band_func(void *a, void *b, int *length, { if (*datatype == MPI_CHAR) { APPLY_FUNC(a, b, length, char, BAND_OP); - } - if (*datatype == MPI_SHORT) { + }else if (*datatype == MPI_SHORT) { APPLY_FUNC(a, b, length, short, BAND_OP); } else if (*datatype == MPI_INT) { APPLY_FUNC(a, b, length, int, BAND_OP); @@ -1095,6 +1508,8 @@ static void band_func(void *a, void *b, int *length, APPLY_FUNC(a, b, length, unsigned int, BAND_OP); } else if (*datatype == MPI_UNSIGNED_LONG) { APPLY_FUNC(a, b, length, unsigned long, BAND_OP); + } else if (*datatype == MPI_UNSIGNED_CHAR) { + APPLY_FUNC(a, b, length, unsigned char, BAND_OP); } else if (*datatype == MPI_BYTE) { APPLY_FUNC(a, b, length, uint8_t, BAND_OP); } @@ -1117,6 +1532,8 @@ static void bor_func(void *a, void *b, int *length, APPLY_FUNC(a, b, length, unsigned int, BOR_OP); } else if (*datatype == MPI_UNSIGNED_LONG) { APPLY_FUNC(a, b, length, unsigned long, BOR_OP); + } else if (*datatype == MPI_UNSIGNED_CHAR) { + APPLY_FUNC(a, b, length, unsigned char, BOR_OP); } else if (*datatype == MPI_BYTE) { APPLY_FUNC(a, b, length, uint8_t, BOR_OP); } @@ -1139,6 +1556,8 @@ static void bxor_func(void *a, void *b, int *length, APPLY_FUNC(a, b, length, unsigned int, BXOR_OP); } else if (*datatype == MPI_UNSIGNED_LONG) { APPLY_FUNC(a, b, length, unsigned long, BXOR_OP); + } else if (*datatype == MPI_UNSIGNED_CHAR) { + APPLY_FUNC(a, b, length, unsigned char, BXOR_OP); } else if (*datatype == MPI_BYTE) { APPLY_FUNC(a, b, length, uint8_t, BXOR_OP); } @@ -1155,6 +1574,8 @@ static void minloc_func(void *a, void *b, int *length, APPLY_FUNC(a, b, length, double_int, MINLOC_OP); } else if (*datatype == MPI_SHORT_INT) { APPLY_FUNC(a, b, length, short_int, MINLOC_OP); + } else if (*datatype == MPI_2LONG) { + APPLY_FUNC(a, b, length, long_long, MINLOC_OP); } else if (*datatype == MPI_2INT) { APPLY_FUNC(a, b, length, int_int, MINLOC_OP); } else if (*datatype == MPI_LONG_DOUBLE_INT) { @@ -1177,6 +1598,8 @@ static void maxloc_func(void *a, void *b, int *length, APPLY_FUNC(a, b, length, double_int, MAXLOC_OP); } else if (*datatype == MPI_SHORT_INT) { APPLY_FUNC(a, b, length, short_int, MAXLOC_OP); + } else if (*datatype == MPI_2LONG) { + APPLY_FUNC(a, b, length, long_long, MAXLOC_OP); } else if (*datatype == MPI_2INT) { APPLY_FUNC(a, b, length, int_int, MAXLOC_OP); } else if (*datatype == MPI_LONG_DOUBLE_INT) { @@ -1188,9 +1611,14 @@ static void maxloc_func(void *a, void *b, int *length, } } +static void replace_func(void *a, void *b, int *length, + MPI_Datatype * datatype) +{ + memcpy(b, a, *length * smpi_datatype_size(*datatype)); +} #define CREATE_MPI_OP(name, func) \ - static s_smpi_mpi_op_t mpi_##name = { &(func) /* func */ }; \ + static s_smpi_mpi_op_t mpi_##name = { &(func) /* func */, TRUE }; \ MPI_Op name = &mpi_##name; CREATE_MPI_OP(MPI_MAX, max_func); @@ -1205,17 +1633,23 @@ CREATE_MPI_OP(MPI_BOR, bor_func); CREATE_MPI_OP(MPI_BXOR, bxor_func); CREATE_MPI_OP(MPI_MAXLOC, maxloc_func); CREATE_MPI_OP(MPI_MINLOC, minloc_func); +CREATE_MPI_OP(MPI_REPLACE, replace_func); + MPI_Op smpi_op_new(MPI_User_function * function, int commute) { MPI_Op op; - - //FIXME: add commute param op = xbt_new(s_smpi_mpi_op_t, 1); op->func = function; + op-> is_commute = commute; return op; } +int smpi_op_is_commute(MPI_Op op) +{ + return (op==MPI_OP_NULL) ? 1 : op-> is_commute; +} + void smpi_op_destroy(MPI_Op op) { xbt_free(op); @@ -1224,5 +1658,136 @@ void smpi_op_destroy(MPI_Op op) void smpi_op_apply(MPI_Op op, void *invec, void *inoutvec, int *len, MPI_Datatype * datatype) { + if(op==MPI_OP_NULL) + return; + + if(smpi_privatize_global_variables){ //we need to switch here, as the called function may silently touch global variables + XBT_DEBUG("Applying operation, switch to the right data frame "); + smpi_switch_data_segment(smpi_process_index()); + } + + if(!smpi_process_get_replaying()) op->func(invec, inoutvec, len, datatype); } + +int smpi_type_attr_delete(MPI_Datatype type, int keyval){ + char* tmpkey=xbt_malloc(INTSIZEDCHAR); + sprintf(tmpkey, "%d", keyval); + smpi_type_key_elem elem = xbt_dict_get_or_null(smpi_type_keyvals, (const char*)tmpkey); + if(!elem) + return MPI_ERR_ARG; + if(elem->delete_fn!=MPI_NULL_DELETE_FN){ + void * value; + int flag; + if(smpi_type_attr_get(type, keyval, &value, &flag)==MPI_SUCCESS){ + int ret = elem->delete_fn(type, keyval, value, &flag); + if(ret!=MPI_SUCCESS) return ret; + } + } + if(type->attributes==NULL) + return MPI_ERR_ARG; + + xbt_dict_remove(type->attributes, (const char*)tmpkey); + xbt_free(tmpkey); + return MPI_SUCCESS; +} + +int smpi_type_attr_get(MPI_Datatype type, int keyval, void* attr_value, int* flag){ + char* tmpkey=xbt_malloc(INTSIZEDCHAR); + sprintf(tmpkey, "%d", keyval); + smpi_type_key_elem elem = xbt_dict_get_or_null(smpi_type_keyvals, (const char*)tmpkey); + if(!elem) + return MPI_ERR_ARG; + xbt_ex_t ex; + if(type->attributes==NULL){ + *flag=0; + return MPI_SUCCESS; + } + TRY { + *(void**)attr_value = xbt_dict_get(type->attributes, (const char*)tmpkey); + *flag=1; + } + CATCH(ex) { + *flag=0; + xbt_ex_free(ex); + } + xbt_free(tmpkey); + return MPI_SUCCESS; +} + +int smpi_type_attr_put(MPI_Datatype type, int keyval, void* attr_value){ + if(!smpi_type_keyvals) + smpi_type_keyvals = xbt_dict_new(); + char* tmpkey=xbt_malloc(INTSIZEDCHAR); + sprintf(tmpkey, "%d", keyval); + smpi_type_key_elem elem = xbt_dict_get_or_null(smpi_type_keyvals, (const char*)tmpkey); + if(!elem ) + return MPI_ERR_ARG; + int flag; + void* value; + smpi_type_attr_get(type, keyval, &value, &flag); + if(flag && elem->delete_fn!=MPI_NULL_DELETE_FN){ + int ret = elem->delete_fn(type, keyval, value, &flag); + if(ret!=MPI_SUCCESS) return ret; + } + if(type->attributes==NULL) + type->attributes=xbt_dict_new(); + + xbt_dict_set(type->attributes, (const char*)tmpkey, attr_value, NULL); + xbt_free(tmpkey); + return MPI_SUCCESS; +} + +int smpi_type_keyval_create(MPI_Type_copy_attr_function* copy_fn, MPI_Type_delete_attr_function* delete_fn, int* keyval, void* extra_state){ + + if(!smpi_type_keyvals) + smpi_type_keyvals = xbt_dict_new(); + + smpi_type_key_elem value = (smpi_type_key_elem) xbt_new0(s_smpi_mpi_type_key_elem_t,1); + + value->copy_fn=copy_fn; + value->delete_fn=delete_fn; + + *keyval = type_keyval_id; + char* tmpkey=xbt_malloc(INTSIZEDCHAR); + sprintf(tmpkey, "%d", *keyval); + xbt_dict_set(smpi_type_keyvals,(const char*)tmpkey,(void*)value, NULL); + type_keyval_id++; + xbt_free(tmpkey); + return MPI_SUCCESS; +} + +int smpi_type_keyval_free(int* keyval){ + char* tmpkey=xbt_malloc(INTSIZEDCHAR); + sprintf(tmpkey, "%d", *keyval); + smpi_type_key_elem elem = xbt_dict_get_or_null(smpi_type_keyvals, (const char*)tmpkey); + if(!elem){ + xbt_free(tmpkey); + return MPI_ERR_ARG; + } + xbt_dict_remove(smpi_type_keyvals, (const char*)tmpkey); + xbt_free(elem); + xbt_free(tmpkey); + return MPI_SUCCESS; +} + +int smpi_mpi_pack(void* inbuf, int incount, MPI_Datatype type, void* outbuf, int outcount, int* position, MPI_Comm comm){ + size_t size = smpi_datatype_size(type); + if (outcount - *position < incount*size) + return MPI_ERR_BUFFER; + smpi_datatype_copy(inbuf, incount, type, + (char*)outbuf + *position, outcount, MPI_CHAR); + *position += incount * size; + return MPI_SUCCESS; +} + +int smpi_mpi_unpack(void* inbuf, int insize, int* position, void* outbuf, int outcount, MPI_Datatype type, MPI_Comm comm){ + size_t size = smpi_datatype_size(type); + if (outcount*size> insize) + return MPI_ERR_BUFFER; + smpi_datatype_copy((char*)inbuf + *position, insize, MPI_CHAR, + outbuf, outcount, type); + *position += outcount * size; + return MPI_SUCCESS; +} +