X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/ad5734ab76c2c430832004e0e3af55a0378e1c5d..d504ff630c68bc8bf3e42a22ddcff4ebc62dbe51:/src/smpi/smpi_mpi_dt.c diff --git a/src/smpi/smpi_mpi_dt.c b/src/smpi/smpi_mpi_dt.c index b3320611dd..6041b85688 100644 --- a/src/smpi/smpi_mpi_dt.c +++ b/src/smpi/smpi_mpi_dt.c @@ -28,12 +28,30 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_mpi_dt, smpi, }; \ MPI_Datatype name = &mpi_##name; +#define CREATE_MPI_DATATYPE_NULL(name) \ + static s_smpi_mpi_datatype_t mpi_##name = { \ + 0, /* size */ \ + 0, /*was 1 has_subtype*/ \ + 0, /* lb */ \ + 0, /* ub = lb + size */ \ + DT_FLAG_BASIC, /* flags */ \ + NULL /* pointer on extended struct*/ \ + }; \ +MPI_Datatype name = &mpi_##name; //The following are datatypes for the MPI functions MPI_MAXLOC and MPI_MINLOC. typedef struct { float value; int index; } float_int; +typedef struct { + float value; + float index; +} float_float; +typedef struct { + double value; + double index; +} double_double; typedef struct { long value; int index; @@ -91,8 +109,14 @@ CREATE_MPI_DATATYPE(MPI_LONG_INT, long_int); CREATE_MPI_DATATYPE(MPI_DOUBLE_INT, double_int); CREATE_MPI_DATATYPE(MPI_SHORT_INT, short_int); CREATE_MPI_DATATYPE(MPI_2INT, int_int); +CREATE_MPI_DATATYPE(MPI_2FLOAT, float_float); +CREATE_MPI_DATATYPE(MPI_2DOUBLE, double_double); + CREATE_MPI_DATATYPE(MPI_LONG_DOUBLE_INT, long_double_int); +CREATE_MPI_DATATYPE_NULL(MPI_UB); +CREATE_MPI_DATATYPE_NULL(MPI_LB); +CREATE_MPI_DATATYPE_NULL(MPI_PACKED); // Internal use only CREATE_MPI_DATATYPE(MPI_PTR, void*); @@ -117,27 +141,22 @@ MPI_Aint smpi_datatype_ub(MPI_Datatype datatype) int smpi_datatype_extent(MPI_Datatype datatype, MPI_Aint * lb, MPI_Aint * extent) { - int retval; + *lb = datatype->lb; + *extent = datatype->ub - datatype->lb; + return MPI_SUCCESS; +} - if ((datatype->flags & DT_FLAG_COMMITED) != DT_FLAG_COMMITED) { - retval = MPI_ERR_TYPE; - } else { - *lb = datatype->lb; - *extent = datatype->ub - datatype->lb; - retval = MPI_SUCCESS; - } - return retval; +MPI_Aint smpi_datatype_get_extent(MPI_Datatype datatype){ + return datatype->ub - datatype->lb; } int smpi_datatype_copy(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype) { - int retval, count; + int count; /* First check if we really have something to do */ - if (recvcount == 0) { - retval = sendcount == 0 ? MPI_SUCCESS : MPI_ERR_TRUNCATE; - } else { + if (recvcount > 0 && recvbuf != sendbuf) { /* FIXME: treat packed cases */ sendcount *= smpi_datatype_size(sendtype); recvcount *= smpi_datatype_size(recvtype); @@ -158,20 +177,18 @@ int smpi_datatype_copy(void *sendbuf, int sendcount, MPI_Datatype sendtype, }else{ s_smpi_subtype_t *subtype = sendtype->substruct; - s_smpi_mpi_vector_t* type_c = (s_smpi_mpi_vector_t*)sendtype; - void * buf_tmp = malloc(count * type_c->size_oldtype); + void * buf_tmp = xbt_malloc(count); subtype->serialize( sendbuf, buf_tmp,1, subtype); subtype = recvtype->substruct; - subtype->unserialize(recvbuf, buf_tmp,1, subtype); + subtype->unserialize( buf_tmp, recvbuf,1, subtype); free(buf_tmp); } - retval = sendcount > recvcount ? MPI_ERR_TRUNCATE : MPI_SUCCESS; } - return retval; + return sendcount > recvcount ? MPI_ERR_TRUNCATE : MPI_SUCCESS; } /* @@ -194,11 +211,17 @@ void serialize_vector( const void *noncontiguous_vector, char* noncontiguous_vector_char = (char*)noncontiguous_vector; for (i = 0; i < type_c->block_count * count; i++) { - memcpy(contiguous_vector_char, - noncontiguous_vector_char, type_c->block_length * type_c->size_oldtype); + if (type_c->old_type->has_subtype == 0) + memcpy(contiguous_vector_char, + noncontiguous_vector_char, type_c->block_length * type_c->size_oldtype); + else + ((s_smpi_subtype_t*)type_c->old_type->substruct)->serialize( noncontiguous_vector_char, + contiguous_vector_char, + type_c->block_length, + type_c->old_type->substruct); contiguous_vector_char += type_c->block_length*type_c->size_oldtype; - noncontiguous_vector_char += type_c->block_stride*type_c->size_oldtype; + noncontiguous_vector_char += type_c->block_stride*smpi_datatype_get_extent(type_c->old_type); } } @@ -223,17 +246,22 @@ void unserialize_vector( const void *contiguous_vector, char* noncontiguous_vector_char = (char*)noncontiguous_vector; for (i = 0; i < type_c->block_count * count; i++) { - memcpy(noncontiguous_vector_char, - contiguous_vector_char, type_c->block_length * type_c->size_oldtype); - + if (type_c->old_type->has_subtype == 0) + memcpy(noncontiguous_vector_char, + contiguous_vector_char, type_c->block_length * type_c->size_oldtype); + else + ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_vector_char, + noncontiguous_vector_char, + type_c->block_length, + type_c->old_type->substruct); contiguous_vector_char += type_c->block_length*type_c->size_oldtype; - noncontiguous_vector_char += type_c->block_stride*type_c->size_oldtype; + noncontiguous_vector_char += type_c->block_stride*smpi_datatype_get_extent(type_c->old_type); } } /* * Create a Sub type vector to be able to serialize and unserialize it - * the structre s_smpi_mpi_vector_t is derived from s_smpi_subtype which + * the structure s_smpi_mpi_vector_t is derived from s_smpi_subtype which * required the functions unserialize and serialize * */ @@ -245,6 +273,7 @@ s_smpi_mpi_vector_t* smpi_datatype_vector_create( int block_stride, s_smpi_mpi_vector_t *new_t= xbt_new(s_smpi_mpi_vector_t,1); new_t->base.serialize = &serialize_vector; new_t->base.unserialize = &unserialize_vector; + new_t->base.subtype_free = &free_vector; new_t->block_stride = block_stride; new_t->block_length = block_length; new_t->block_count = block_count; @@ -253,32 +282,59 @@ s_smpi_mpi_vector_t* smpi_datatype_vector_create( int block_stride, return new_t; } -void smpi_datatype_create(MPI_Datatype* new_type, int size, int has_subtype, +void smpi_datatype_create(MPI_Datatype* new_type, int size,int lb, int ub, int has_subtype, void *struct_type, int flags){ MPI_Datatype new_t= xbt_new(s_smpi_mpi_datatype_t,1); new_t->size = size; new_t->has_subtype = has_subtype; - new_t->lb = 0; - new_t->ub = size; + new_t->lb = lb; + new_t->ub = ub; new_t->flags = flags; new_t->substruct = struct_type; + new_t->in_use=0; *new_type = new_t; } void smpi_datatype_free(MPI_Datatype* type){ + + if((*type)->flags & DT_FLAG_PREDEFINED)return; + + //if still used, mark for deletion + if((*type)->in_use!=0){ + (*type)->flags |=DT_FLAG_DESTROYED; + return; + } + + if ((*type)->has_subtype == 1){ + ((s_smpi_subtype_t *)(*type)->substruct)->subtype_free(type); + xbt_free((*type)->substruct); + } xbt_free(*type); + +} + +void smpi_datatype_use(MPI_Datatype type){ + if(type)type->in_use++; +} + + +void smpi_datatype_unuse(MPI_Datatype type){ + if(type && type->in_use-- == 0 && (type->flags & DT_FLAG_DESTROYED)) + smpi_datatype_free(&type); } int smpi_datatype_contiguous(int count, MPI_Datatype old_type, MPI_Datatype* new_type) { int retval; - if ((old_type->flags & DT_FLAG_COMMITED) != DT_FLAG_COMMITED) { - retval = MPI_ERR_TYPE; - } else { - smpi_datatype_create(new_type, count * - smpi_datatype_size(old_type),1,NULL, DT_FLAG_CONTIGUOUS); - retval=MPI_SUCCESS; + if(old_type->has_subtype){ + //handle this case as a hvector with stride equals to the extent of the datatype + return smpi_datatype_hvector(count, 1, smpi_datatype_get_extent(old_type), old_type, new_type); } + smpi_datatype_create(new_type, + count * smpi_datatype_size(old_type), + 0,count * smpi_datatype_size(old_type), + 0,NULL, DT_FLAG_CONTIGUOUS); + retval=MPI_SUCCESS; return retval; } @@ -286,37 +342,43 @@ int smpi_datatype_vector(int count, int blocklen, int stride, MPI_Datatype old_t { int retval; if (blocklen<=0) return MPI_ERR_ARG; - if ((old_type->flags & DT_FLAG_COMMITED) != DT_FLAG_COMMITED) { - retval = MPI_ERR_TYPE; - } else { - if(stride != blocklen){ - s_smpi_mpi_vector_t* subtype = smpi_datatype_vector_create( stride, - blocklen, - count, - old_type, - smpi_datatype_size(old_type)); - - smpi_datatype_create(new_type, count * (blocklen) * - smpi_datatype_size(old_type), - 1, - subtype, - DT_FLAG_VECTOR); - retval=MPI_SUCCESS; - }else{ - /* in this situation the data are contignous thus it's not - * required to serialize and unserialize it*/ - smpi_datatype_create(new_type, count * blocklen * - smpi_datatype_size(old_type), - 0, - NULL, - DT_FLAG_VECTOR); - retval=MPI_SUCCESS; - } + MPI_Aint lb = 0; + MPI_Aint ub = 0; + if(count>0){ + lb=smpi_datatype_lb(old_type); + ub=((count-1)*stride+blocklen-1)*smpi_datatype_get_extent(old_type)+smpi_datatype_ub(old_type); + } + if(old_type->has_subtype || stride != blocklen){ + + + s_smpi_mpi_vector_t* subtype = smpi_datatype_vector_create( stride, + blocklen, + count, + old_type, + smpi_datatype_size(old_type)); + smpi_datatype_create(new_type, + count * (blocklen) * smpi_datatype_size(old_type), lb, + ub, + 1, + subtype, + DT_FLAG_VECTOR); + retval=MPI_SUCCESS; + }else{ + /* in this situation the data are contignous thus it's not + * required to serialize and unserialize it*/ + smpi_datatype_create(new_type, count * blocklen * + smpi_datatype_size(old_type), 0, ((count -1) * stride + blocklen)* + smpi_datatype_size(old_type), + 0, + NULL, + DT_FLAG_VECTOR|DT_FLAG_CONTIGUOUS); + retval=MPI_SUCCESS; } return retval; } - +void free_vector(MPI_Datatype* d){ +} /* Hvector Implementation - Vector with stride in bytes @@ -343,8 +405,14 @@ void serialize_hvector( const void *noncontiguous_hvector, char* noncontiguous_vector_char = (char*)noncontiguous_hvector; for (i = 0; i < type_c->block_count * count; i++) { - memcpy(contiguous_vector_char, + if (type_c->old_type->has_subtype == 0) + memcpy(contiguous_vector_char, noncontiguous_vector_char, type_c->block_length * type_c->size_oldtype); + else + ((s_smpi_subtype_t*)type_c->old_type->substruct)->serialize( noncontiguous_vector_char, + contiguous_vector_char, + type_c->block_length, + type_c->old_type->substruct); contiguous_vector_char += type_c->block_length*type_c->size_oldtype; noncontiguous_vector_char += type_c->block_stride; @@ -371,9 +439,14 @@ void unserialize_hvector( const void *contiguous_vector, char* noncontiguous_vector_char = (char*)noncontiguous_vector; for (i = 0; i < type_c->block_count * count; i++) { - memcpy(noncontiguous_vector_char, + if (type_c->old_type->has_subtype == 0) + memcpy(noncontiguous_vector_char, contiguous_vector_char, type_c->block_length * type_c->size_oldtype); - + else + ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_vector_char, + noncontiguous_vector_char, + type_c->block_length, + type_c->old_type->substruct); contiguous_vector_char += type_c->block_length*type_c->size_oldtype; noncontiguous_vector_char += type_c->block_stride; } @@ -381,7 +454,7 @@ void unserialize_hvector( const void *contiguous_vector, /* * Create a Sub type vector to be able to serialize and unserialize it - * the structre s_smpi_mpi_vector_t is derived from s_smpi_subtype which + * the structure s_smpi_mpi_vector_t is derived from s_smpi_subtype which * required the functions unserialize and serialize * */ @@ -393,6 +466,7 @@ s_smpi_mpi_hvector_t* smpi_datatype_hvector_create( MPI_Aint block_stride, s_smpi_mpi_hvector_t *new_t= xbt_new(s_smpi_mpi_hvector_t,1); new_t->base.serialize = &serialize_hvector; new_t->base.unserialize = &unserialize_hvector; + new_t->base.subtype_free = &free_hvector; new_t->block_stride = block_stride; new_t->block_length = block_length; new_t->block_count = block_count; @@ -401,34 +475,41 @@ s_smpi_mpi_hvector_t* smpi_datatype_hvector_create( MPI_Aint block_stride, return new_t; } +//do nothing for vector types +void free_hvector(MPI_Datatype* d){ +} + int smpi_datatype_hvector(int count, int blocklen, MPI_Aint stride, MPI_Datatype old_type, MPI_Datatype* new_type) { int retval; if (blocklen<=0) return MPI_ERR_ARG; - if ((old_type->flags & DT_FLAG_COMMITED) != DT_FLAG_COMMITED) { - retval = MPI_ERR_TYPE; - } else { - if(stride != blocklen*smpi_datatype_size(old_type)){ - s_smpi_mpi_hvector_t* subtype = smpi_datatype_hvector_create( stride, - blocklen, - count, - old_type, - smpi_datatype_size(old_type)); - - smpi_datatype_create(new_type, count * blocklen * - smpi_datatype_size(old_type), - 1, - subtype, - DT_FLAG_VECTOR); - retval=MPI_SUCCESS; - }else{ - smpi_datatype_create(new_type, count * blocklen * - smpi_datatype_size(old_type), - 0, - NULL, - DT_FLAG_VECTOR); - retval=MPI_SUCCESS; - } + MPI_Aint lb = 0; + MPI_Aint ub = 0; + if(count>0){ + lb=smpi_datatype_lb(old_type); + ub=((count-1)*stride)+(blocklen-1)*smpi_datatype_get_extent(old_type)+smpi_datatype_ub(old_type); + } + if(old_type->has_subtype || stride != blocklen*smpi_datatype_get_extent(old_type)){ + s_smpi_mpi_hvector_t* subtype = smpi_datatype_hvector_create( stride, + blocklen, + count, + old_type, + smpi_datatype_size(old_type)); + + smpi_datatype_create(new_type, count * blocklen * smpi_datatype_size(old_type), + lb,ub, + 1, + subtype, + DT_FLAG_VECTOR); + retval=MPI_SUCCESS; + }else{ + smpi_datatype_create(new_type, count * blocklen * + smpi_datatype_size(old_type),0,count * blocklen * + smpi_datatype_size(old_type), + 0, + NULL, + DT_FLAG_VECTOR|DT_FLAG_CONTIGUOUS); + retval=MPI_SUCCESS; } return retval; } @@ -443,8 +524,8 @@ Indexed Implementation * @param contiguous_indexed - output indexed * @param noncontiguous_indexed - input indexed * @param type - pointer contening : - * - stride - stride of between noncontiguous data - * - block_length - the width or height of blocked matrix + * - block_lengths - the width or height of blocked matrix + * - block_indices - indices of each data, in element * - count - the number of rows of matrix */ void serialize_indexed( const void *noncontiguous_indexed, @@ -453,16 +534,26 @@ void serialize_indexed( const void *noncontiguous_indexed, void *type) { s_smpi_mpi_indexed_t* type_c = (s_smpi_mpi_indexed_t*)type; - int i; + int i,j; char* contiguous_indexed_char = (char*)contiguous_indexed; char* noncontiguous_indexed_char = (char*)noncontiguous_indexed; - - for (i = 0; i < type_c->block_count * count; i++) { - memcpy(contiguous_indexed_char, - noncontiguous_indexed_char, type_c->block_lengths[i] * type_c->size_oldtype); - - contiguous_indexed_char += type_c->block_lengths[i]*type_c->size_oldtype; - noncontiguous_indexed_char = (char*)noncontiguous_indexed + type_c->block_indices[i+1]*type_c->size_oldtype; + for(j=0; jblock_count; i++) { + if (type_c->old_type->has_subtype == 0) + memcpy(contiguous_indexed_char, + noncontiguous_indexed_char, type_c->block_lengths[i] * type_c->size_oldtype); + else + ((s_smpi_subtype_t*)type_c->old_type->substruct)->serialize( noncontiguous_indexed_char, + contiguous_indexed_char, + type_c->block_lengths[i], + type_c->old_type->substruct); + + + contiguous_indexed_char += type_c->block_lengths[i]*type_c->size_oldtype; + if (iblock_count-1)noncontiguous_indexed_char = (char*)noncontiguous_indexed + type_c->block_indices[i+1]*smpi_datatype_get_extent(type_c->old_type); + else noncontiguous_indexed_char += type_c->block_lengths[i]*smpi_datatype_get_extent(type_c->old_type); + } + noncontiguous_indexed=(void*)noncontiguous_indexed_char; } } /* @@ -470,8 +561,8 @@ void serialize_indexed( const void *noncontiguous_indexed, * @param noncontiguous_indexed - output indexed * @param contiguous_indexed - input indexed * @param type - pointer contening : - * - stride - stride of between noncontiguous data - * - block_length - the width or height of blocked matrix + * - block_lengths - the width or height of blocked matrix + * - block_indices - indices of each data, in element * - count - the number of rows of matrix */ void unserialize_indexed( const void *contiguous_indexed, @@ -480,23 +571,38 @@ void unserialize_indexed( const void *contiguous_indexed, void *type) { s_smpi_mpi_indexed_t* type_c = (s_smpi_mpi_indexed_t*)type; - int i; + int i,j; char* contiguous_indexed_char = (char*)contiguous_indexed; char* noncontiguous_indexed_char = (char*)noncontiguous_indexed; - - for (i = 0; i < type_c->block_count * count; i++) { - memcpy(noncontiguous_indexed_char, - contiguous_indexed_char, type_c->block_lengths[i] * type_c->size_oldtype); - - contiguous_indexed_char += type_c->block_lengths[i]*type_c->size_oldtype; - noncontiguous_indexed_char = (char*)noncontiguous_indexed + type_c->block_indices[i+1]*type_c->size_oldtype; + for(j=0; jblock_count; i++) { + if (type_c->old_type->has_subtype == 0) + memcpy(noncontiguous_indexed_char, + contiguous_indexed_char, type_c->block_lengths[i] * type_c->size_oldtype); + else + ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_indexed_char, + noncontiguous_indexed_char, + type_c->block_lengths[i], + type_c->old_type->substruct); + + contiguous_indexed_char += type_c->block_lengths[i]*type_c->size_oldtype; + if (iblock_count-1) + noncontiguous_indexed_char = (char*)noncontiguous_indexed + type_c->block_indices[i+1]*smpi_datatype_get_extent(type_c->old_type); + else noncontiguous_indexed_char += type_c->block_lengths[i]*smpi_datatype_get_extent(type_c->old_type); + } + noncontiguous_indexed=(void*)noncontiguous_indexed_char; } } +void free_indexed(MPI_Datatype* type){ + xbt_free(((s_smpi_mpi_indexed_t *)(*type)->substruct)->block_lengths); + xbt_free(((s_smpi_mpi_indexed_t *)(*type)->substruct)->block_indices); +} + /* * Create a Sub type indexed to be able to serialize and unserialize it - * the structre s_smpi_mpi_indexed_t is derived from s_smpi_subtype which + * the structure s_smpi_mpi_indexed_t is derived from s_smpi_subtype which * required the functions unserialize and serialize */ s_smpi_mpi_indexed_t* smpi_datatype_indexed_create( int* block_lengths, @@ -507,9 +613,15 @@ s_smpi_mpi_indexed_t* smpi_datatype_indexed_create( int* block_lengths, s_smpi_mpi_indexed_t *new_t= xbt_new(s_smpi_mpi_indexed_t,1); new_t->base.serialize = &serialize_indexed; new_t->base.unserialize = &unserialize_indexed; - //FIXME : copy those or assume they won't be freed ? - new_t->block_lengths = block_lengths; - new_t->block_indices = block_indices; + new_t->base.subtype_free = &free_indexed; + //TODO : add a custom function for each time to clean these + new_t->block_lengths= xbt_new(int, block_count); + new_t->block_indices= xbt_new(int, block_count); + int i; + for(i=0;iblock_lengths[i]=block_lengths[i]; + new_t->block_indices[i]=block_indices[i]; + } new_t->block_count = block_count; new_t->old_type = old_type; new_t->size_oldtype = size_oldtype; @@ -522,24 +634,43 @@ int smpi_datatype_indexed(int count, int* blocklens, int* indices, MPI_Datatype int i; int retval; int size = 0; + int contiguous=1; + MPI_Aint lb = 0; + MPI_Aint ub = 0; + if(count>0){ + lb=indices[0]*smpi_datatype_get_extent(old_type); + ub=indices[0]*smpi_datatype_get_extent(old_type) + blocklens[0]*smpi_datatype_ub(old_type); + } + for(i=0; i< count; i++){ if (blocklens[i]<=0) return MPI_ERR_ARG; size += blocklens[i]; + + if(indices[i]*smpi_datatype_get_extent(old_type)+smpi_datatype_lb(old_type)ub) + ub = indices[i]*smpi_datatype_get_extent(old_type)+blocklens[i]*smpi_datatype_ub(old_type); + + if ( (i< count -1) && (indices[i]+blocklens[i] != indices[i+1]) )contiguous=0; } - if ((old_type->flags & DT_FLAG_COMMITED) != DT_FLAG_COMMITED) { - retval = MPI_ERR_TYPE; - } else { + if (old_type->has_subtype == 1) + contiguous=0; + + if(!contiguous){ s_smpi_mpi_indexed_t* subtype = smpi_datatype_indexed_create( blocklens, indices, count, old_type, smpi_datatype_size(old_type)); - + smpi_datatype_create(new_type, size * + smpi_datatype_size(old_type),lb,ub,1, subtype, DT_FLAG_DATA); + }else{ smpi_datatype_create(new_type, size * - smpi_datatype_size(old_type),1, subtype, DT_FLAG_DATA); - retval=MPI_SUCCESS; + smpi_datatype_size(old_type),0,size * + smpi_datatype_size(old_type),0, NULL, DT_FLAG_DATA|DT_FLAG_CONTIGUOUS); } + retval=MPI_SUCCESS; return retval; } @@ -553,8 +684,8 @@ Hindexed Implementation - Indexed with indices in bytes * @param contiguous_hindexed - output hindexed * @param noncontiguous_hindexed - input hindexed * @param type - pointer contening : - * - stride - stride of between noncontiguous data - * - block_length - the width or height of blocked matrix + * - block_lengths - the width or height of blocked matrix + * - block_indices - indices of each data, in bytes * - count - the number of rows of matrix */ void serialize_hindexed( const void *noncontiguous_hindexed, @@ -563,16 +694,25 @@ void serialize_hindexed( const void *noncontiguous_hindexed, void *type) { s_smpi_mpi_hindexed_t* type_c = (s_smpi_mpi_hindexed_t*)type; - int i; + int i,j; char* contiguous_hindexed_char = (char*)contiguous_hindexed; char* noncontiguous_hindexed_char = (char*)noncontiguous_hindexed; - - for (i = 0; i < type_c->block_count * count; i++) { - memcpy(contiguous_hindexed_char, - noncontiguous_hindexed_char, type_c->block_lengths[i] * type_c->size_oldtype); - - contiguous_hindexed_char += type_c->block_lengths[i]*type_c->size_oldtype; - noncontiguous_hindexed_char = (char*)noncontiguous_hindexed + type_c->block_indices[i+1]; + for(j=0; jblock_count; i++) { + if (type_c->old_type->has_subtype == 0) + memcpy(contiguous_hindexed_char, + noncontiguous_hindexed_char, type_c->block_lengths[i] * type_c->size_oldtype); + else + ((s_smpi_subtype_t*)type_c->old_type->substruct)->serialize( noncontiguous_hindexed_char, + contiguous_hindexed_char, + type_c->block_lengths[i], + type_c->old_type->substruct); + + contiguous_hindexed_char += type_c->block_lengths[i]*type_c->size_oldtype; + if (iblock_count-1)noncontiguous_hindexed_char = (char*)noncontiguous_hindexed + type_c->block_indices[i+1]; + else noncontiguous_hindexed_char += type_c->block_lengths[i]*smpi_datatype_get_extent(type_c->old_type); + } + noncontiguous_hindexed=(void*)noncontiguous_hindexed_char; } } /* @@ -580,8 +720,8 @@ void serialize_hindexed( const void *noncontiguous_hindexed, * @param noncontiguous_hindexed - output hindexed * @param contiguous_hindexed - input hindexed * @param type - pointer contening : - * - stride - stride of between noncontiguous data - * - block_length - the width or height of blocked matrix + * - block_lengths - the width or height of blocked matrix + * - block_indices - indices of each data, in bytes * - count - the number of rows of matrix */ void unserialize_hindexed( const void *contiguous_hindexed, @@ -590,23 +730,37 @@ void unserialize_hindexed( const void *contiguous_hindexed, void *type) { s_smpi_mpi_hindexed_t* type_c = (s_smpi_mpi_hindexed_t*)type; - int i; + int i,j; char* contiguous_hindexed_char = (char*)contiguous_hindexed; char* noncontiguous_hindexed_char = (char*)noncontiguous_hindexed; - - for (i = 0; i < type_c->block_count * count; i++) { - memcpy(noncontiguous_hindexed_char, - contiguous_hindexed_char, type_c->block_lengths[i] * type_c->size_oldtype); - - contiguous_hindexed_char += type_c->block_lengths[i]*type_c->size_oldtype; - noncontiguous_hindexed_char = (char*)noncontiguous_hindexed + type_c->block_indices[i+1]; + for(j=0; jblock_count; i++) { + if (type_c->old_type->has_subtype == 0) + memcpy(noncontiguous_hindexed_char, + contiguous_hindexed_char, type_c->block_lengths[i] * type_c->size_oldtype); + else + ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_hindexed_char, + noncontiguous_hindexed_char, + type_c->block_lengths[i], + type_c->old_type->substruct); + + contiguous_hindexed_char += type_c->block_lengths[i]*type_c->size_oldtype; + if (iblock_count-1)noncontiguous_hindexed_char = (char*)noncontiguous_hindexed + type_c->block_indices[i+1]; + else noncontiguous_hindexed_char += type_c->block_lengths[i]*smpi_datatype_get_extent(type_c->old_type); + } + noncontiguous_hindexed=(void*)noncontiguous_hindexed_char; } } +void free_hindexed(MPI_Datatype* type){ + xbt_free(((s_smpi_mpi_hindexed_t *)(*type)->substruct)->block_lengths); + xbt_free(((s_smpi_mpi_hindexed_t *)(*type)->substruct)->block_indices); +} + /* * Create a Sub type hindexed to be able to serialize and unserialize it - * the structre s_smpi_mpi_hindexed_t is derived from s_smpi_subtype which + * the structure s_smpi_mpi_hindexed_t is derived from s_smpi_subtype which * required the functions unserialize and serialize */ s_smpi_mpi_hindexed_t* smpi_datatype_hindexed_create( int* block_lengths, @@ -617,9 +771,15 @@ s_smpi_mpi_hindexed_t* smpi_datatype_hindexed_create( int* block_lengths, s_smpi_mpi_hindexed_t *new_t= xbt_new(s_smpi_mpi_hindexed_t,1); new_t->base.serialize = &serialize_hindexed; new_t->base.unserialize = &unserialize_hindexed; - //FIXME : copy those or assume they won't be freed ? - new_t->block_lengths = block_lengths; - new_t->block_indices = block_indices; + new_t->base.subtype_free = &free_hindexed; + //TODO : add a custom function for each time to clean these + new_t->block_lengths= xbt_new(int, block_count); + new_t->block_indices= xbt_new(MPI_Aint, block_count); + int i; + for(i=0;iblock_lengths[i]=block_lengths[i]; + new_t->block_indices[i]=block_indices[i]; + } new_t->block_count = block_count; new_t->old_type = old_type; new_t->size_oldtype = size_oldtype; @@ -632,24 +792,42 @@ int smpi_datatype_hindexed(int count, int* blocklens, MPI_Aint* indices, MPI_Dat int i; int retval; int size = 0; + int contiguous=1; + MPI_Aint lb = 0; + MPI_Aint ub = 0; + if(count>0){ + lb=indices[0] + smpi_datatype_lb(old_type); + ub=indices[0] + blocklens[0]*smpi_datatype_ub(old_type); + } for(i=0; i< count; i++){ if (blocklens[i]<=0) return MPI_ERR_ARG; size += blocklens[i]; + + if(indices[i]+smpi_datatype_lb(old_type)ub) ub = indices[i]+blocklens[i]*smpi_datatype_ub(old_type); + + if ( (i< count -1) && (indices[i]+blocklens[i]*smpi_datatype_size(old_type) != indices[i+1]) )contiguous=0; } - if ((old_type->flags & DT_FLAG_COMMITED) != DT_FLAG_COMMITED) { - retval = MPI_ERR_TYPE; - } else { + if (old_type->has_subtype == 1 || lb!=0) + contiguous=0; + + if(!contiguous){ s_smpi_mpi_hindexed_t* subtype = smpi_datatype_hindexed_create( blocklens, indices, count, old_type, smpi_datatype_size(old_type)); - - smpi_datatype_create(new_type, size * - smpi_datatype_size(old_type),1, subtype, DT_FLAG_DATA); - retval=MPI_SUCCESS; + smpi_datatype_create(new_type, size * smpi_datatype_size(old_type), + lb, + ub + ,1, subtype, DT_FLAG_DATA); + }else{ + smpi_datatype_create(new_type, size * smpi_datatype_size(old_type), + 0,size * smpi_datatype_size(old_type), + 0, NULL, DT_FLAG_DATA|DT_FLAG_CONTIGUOUS); } + retval=MPI_SUCCESS; return retval; } @@ -673,15 +851,26 @@ void serialize_struct( const void *noncontiguous_struct, void *type) { s_smpi_mpi_struct_t* type_c = (s_smpi_mpi_struct_t*)type; - int i; + int i,j; char* contiguous_struct_char = (char*)contiguous_struct; char* noncontiguous_struct_char = (char*)noncontiguous_struct; - - for (i = 0; i < type_c->block_count * count; i++) { - memcpy(contiguous_struct_char, - noncontiguous_struct_char, type_c->block_lengths[i] * smpi_datatype_size(type_c->old_types[i])); - contiguous_struct_char += type_c->block_lengths[i]*smpi_datatype_size(type_c->old_types[i]); - noncontiguous_struct_char = (char*)noncontiguous_struct + type_c->block_indices[i+1]; + for(j=0; jblock_count; i++) { + if (type_c->old_types[i]->has_subtype == 0) + memcpy(contiguous_struct_char, + noncontiguous_struct_char, type_c->block_lengths[i] * smpi_datatype_size(type_c->old_types[i])); + else + ((s_smpi_subtype_t*)type_c->old_types[i]->substruct)->serialize( noncontiguous_struct_char, + contiguous_struct_char, + type_c->block_lengths[i], + type_c->old_types[i]->substruct); + + + contiguous_struct_char += type_c->block_lengths[i]*smpi_datatype_size(type_c->old_types[i]); + if (iblock_count-1)noncontiguous_struct_char = (char*)noncontiguous_struct + type_c->block_indices[i+1]; + else noncontiguous_struct_char += type_c->block_lengths[i]*smpi_datatype_get_extent(type_c->old_types[i]);//let's hope this is MPI_UB ? + } + noncontiguous_struct=(void*)noncontiguous_struct_char; } } /* @@ -699,22 +888,39 @@ void unserialize_struct( const void *contiguous_struct, void *type) { s_smpi_mpi_struct_t* type_c = (s_smpi_mpi_struct_t*)type; - int i; + int i,j; char* contiguous_struct_char = (char*)contiguous_struct; char* noncontiguous_struct_char = (char*)noncontiguous_struct; - - for (i = 0; i < type_c->block_count * count; i++) { - memcpy(noncontiguous_struct_char, - contiguous_struct_char, type_c->block_lengths[i] * smpi_datatype_size(type_c->old_types[i])); - contiguous_struct_char += type_c->block_lengths[i]*smpi_datatype_size(type_c->old_types[i]); - noncontiguous_struct_char = (char*)noncontiguous_struct + type_c->block_indices[i+1]; + for(j=0; jblock_count; i++) { + if (type_c->old_types[i]->has_subtype == 0) + memcpy(noncontiguous_struct_char, + contiguous_struct_char, type_c->block_lengths[i] * smpi_datatype_size(type_c->old_types[i])); + else + ((s_smpi_subtype_t*)type_c->old_types[i]->substruct)->unserialize( contiguous_struct_char, + noncontiguous_struct_char, + type_c->block_lengths[i], + type_c->old_types[i]->substruct); + + contiguous_struct_char += type_c->block_lengths[i]*smpi_datatype_size(type_c->old_types[i]); + if (iblock_count-1)noncontiguous_struct_char = (char*)noncontiguous_struct + type_c->block_indices[i+1]; + else noncontiguous_struct_char += type_c->block_lengths[i]*smpi_datatype_get_extent(type_c->old_types[i]); + } + noncontiguous_struct=(void*)noncontiguous_struct_char; + } } +void free_struct(MPI_Datatype* type){ + xbt_free(((s_smpi_mpi_struct_t *)(*type)->substruct)->block_lengths); + xbt_free(((s_smpi_mpi_struct_t *)(*type)->substruct)->block_indices); + xbt_free(((s_smpi_mpi_struct_t *)(*type)->substruct)->old_types); +} + /* * Create a Sub type struct to be able to serialize and unserialize it - * the structre s_smpi_mpi_struct_t is derived from s_smpi_subtype which + * the structure s_smpi_mpi_struct_t is derived from s_smpi_subtype which * required the functions unserialize and serialize */ s_smpi_mpi_struct_t* smpi_datatype_struct_create( int* block_lengths, @@ -724,11 +930,21 @@ s_smpi_mpi_struct_t* smpi_datatype_struct_create( int* block_lengths, s_smpi_mpi_struct_t *new_t= xbt_new(s_smpi_mpi_struct_t,1); new_t->base.serialize = &serialize_struct; new_t->base.unserialize = &unserialize_struct; - //FIXME : copy those or assume they won't be freed ? - new_t->block_lengths = block_lengths; - new_t->block_indices = block_indices; + new_t->base.subtype_free = &free_struct; + //TODO : add a custom function for each time to clean these + new_t->block_lengths= xbt_new(int, block_count); + new_t->block_indices= xbt_new(MPI_Aint, block_count); + new_t->old_types= xbt_new(MPI_Datatype, block_count); + int i; + for(i=0;iblock_lengths[i]=block_lengths[i]; + new_t->block_indices[i]=block_indices[i]; + new_t->old_types[i]=old_types[i]; + } + //new_t->block_lengths = block_lengths; + //new_t->block_indices = block_indices; new_t->block_count = block_count; - new_t->old_types = old_types; + //new_t->old_types = old_types; return new_t; } @@ -737,22 +953,48 @@ int smpi_datatype_struct(int count, int* blocklens, MPI_Aint* indices, MPI_Datat { int i; size_t size = 0; + int contiguous=1; + size = 0; + MPI_Aint lb = 0; + MPI_Aint ub = 0; + if(count>0){ + lb=indices[0] + smpi_datatype_lb(old_types[0]); + ub=indices[0] + blocklens[0]*smpi_datatype_ub(old_types[0]); + } + int forced_lb=0; + int forced_ub=0; for(i=0; i< count; i++){ if (blocklens[i]<=0) return MPI_ERR_ARG; - if ((old_types[i]->flags & DT_FLAG_COMMITED) != DT_FLAG_COMMITED) - return MPI_ERR_TYPE; + if (old_types[i]->has_subtype == 1) + contiguous=0; + size += blocklens[i]*smpi_datatype_size(old_types[i]); - } + if (old_types[i]==MPI_LB){ + lb=indices[i]; + forced_lb=1; + } + if (old_types[i]==MPI_UB){ + ub=indices[i]; + forced_ub=1; + } + if(!forced_lb && indices[i]+smpi_datatype_lb(old_types[i])ub) ub = indices[i]+blocklens[i]*smpi_datatype_ub(old_types[i]); - s_smpi_mpi_struct_t* subtype = smpi_datatype_struct_create( blocklens, + if ( (i< count -1) && (indices[i]+blocklens[i]*smpi_datatype_size(old_types[i]) != indices[i+1]) )contiguous=0; + } + + if(!contiguous){ + s_smpi_mpi_struct_t* subtype = smpi_datatype_struct_create( blocklens, indices, count, old_types); - smpi_datatype_create(new_type, size ,1, subtype, DT_FLAG_DATA); - + smpi_datatype_create(new_type, size, lb, ub,1, subtype, DT_FLAG_DATA); + }else{ + smpi_datatype_create(new_type, size, lb, ub,0, NULL, DT_FLAG_DATA|DT_FLAG_CONTIGUOUS); + } return MPI_SUCCESS; } @@ -763,6 +1005,7 @@ void smpi_datatype_commit(MPI_Datatype *datatype) typedef struct s_smpi_mpi_op { MPI_User_function *func; + int is_commute; } s_smpi_mpi_op_t; #define MAX_OP(a, b) (b) = (a) < (b) ? (b) : (a) @@ -1053,6 +1296,10 @@ static void minloc_func(void *a, void *b, int *length, APPLY_FUNC(a, b, length, int_int, MINLOC_OP); } else if (*datatype == MPI_LONG_DOUBLE_INT) { APPLY_FUNC(a, b, length, long_double_int, MINLOC_OP); + } else if (*datatype == MPI_2FLOAT) { + APPLY_FUNC(a, b, length, float_float, MINLOC_OP); + } else if (*datatype == MPI_2DOUBLE) { + APPLY_FUNC(a, b, length, double_double, MINLOC_OP); } } @@ -1071,12 +1318,16 @@ static void maxloc_func(void *a, void *b, int *length, APPLY_FUNC(a, b, length, int_int, MAXLOC_OP); } else if (*datatype == MPI_LONG_DOUBLE_INT) { APPLY_FUNC(a, b, length, long_double_int, MAXLOC_OP); + } else if (*datatype == MPI_2FLOAT) { + APPLY_FUNC(a, b, length, float_float, MAXLOC_OP); + } else if (*datatype == MPI_2DOUBLE) { + APPLY_FUNC(a, b, length, double_double, MAXLOC_OP); } } #define CREATE_MPI_OP(name, func) \ - static s_smpi_mpi_op_t mpi_##name = { &(func) /* func */ }; \ + static s_smpi_mpi_op_t mpi_##name = { &(func) /* func */, TRUE }; \ MPI_Op name = &mpi_##name; CREATE_MPI_OP(MPI_MAX, max_func); @@ -1095,13 +1346,17 @@ CREATE_MPI_OP(MPI_MINLOC, minloc_func); MPI_Op smpi_op_new(MPI_User_function * function, int commute) { MPI_Op op; - - //FIXME: add commute param op = xbt_new(s_smpi_mpi_op_t, 1); op->func = function; + op-> is_commute = commute; return op; } +int smpi_op_is_commute(MPI_Op op) +{ + return op-> is_commute; +} + void smpi_op_destroy(MPI_Op op) { xbt_free(op);