From 7a453812cf372e6b8ed09c68e521f69cf62487a0 Mon Sep 17 00:00:00 2001 From: Augustin Degomme Date: Wed, 17 Apr 2019 22:38:04 +0200 Subject: [PATCH] MPI_File_read_shared, MPI_File_write_shared, MPI_File_read_ordered, MPI_File_write_ordered, MPI_File_seek_shared, MPI_File_get_position_shared --- src/smpi/bindings/smpi_mpi.cpp | 12 +-- src/smpi/bindings/smpi_pmpi_file.cpp | 95 ++++++++++++++++++ src/smpi/include/smpi_file.hpp | 10 ++ src/smpi/mpi/smpi_file.cpp | 144 ++++++++++++++++++++++++++- 4 files changed, 252 insertions(+), 9 deletions(-) diff --git a/src/smpi/bindings/smpi_mpi.cpp b/src/smpi/bindings/smpi_mpi.cpp index d1fb99864c..c11b325dc6 100644 --- a/src/smpi/bindings/smpi_mpi.cpp +++ b/src/smpi/bindings/smpi_mpi.cpp @@ -377,14 +377,14 @@ UNIMPLEMENTED_WRAPPED_PMPI_CALL(int, MPI_File_iwrite_all,(MPI_File fh, void *buf WRAPPED_PMPI_CALL(int, MPI_File_seek,(MPI_File fh, MPI_Offset offset, int whenace), (fh, offset, whenace)) WRAPPED_PMPI_CALL(int, MPI_File_get_position,(MPI_File fh, MPI_Offset *offset), (fh, offset)) UNIMPLEMENTED_WRAPPED_PMPI_CALL(int, MPI_File_get_byte_offset,(MPI_File fh, MPI_Offset offset, MPI_Offset *disp), (fh, offset, disp)) -UNIMPLEMENTED_WRAPPED_PMPI_CALL(int, MPI_File_read_shared,(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status), (fh, buf, count, datatype, status)) -UNIMPLEMENTED_WRAPPED_PMPI_CALL(int, MPI_File_write_shared,(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status), (fh, buf, count, datatype, status)) +WRAPPED_PMPI_CALL(int, MPI_File_read_shared,(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status), (fh, buf, count, datatype, status)) +WRAPPED_PMPI_CALL(int, MPI_File_write_shared,(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status), (fh, buf, count, datatype, status)) UNIMPLEMENTED_WRAPPED_PMPI_CALL(int, MPI_File_iread_shared,(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Request *request), (fh, buf, count, datatype, request)) UNIMPLEMENTED_WRAPPED_PMPI_CALL(int, MPI_File_iwrite_shared,(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Request *request), (fh, buf, count, datatype, request)) -UNIMPLEMENTED_WRAPPED_PMPI_CALL(int, MPI_File_read_ordered,(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status), (fh, buf, count, datatype, status)) -UNIMPLEMENTED_WRAPPED_PMPI_CALL(int, MPI_File_write_ordered,(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status), (fh, buf, count, datatype, status)) -UNIMPLEMENTED_WRAPPED_PMPI_CALL(int, MPI_File_seek_shared,(MPI_File fh, MPI_Offset offset, int whence), (fh, offset, whence)) -UNIMPLEMENTED_WRAPPED_PMPI_CALL(int, MPI_File_get_position_shared,(MPI_File fh, MPI_Offset *offset), (fh, offset)) +WRAPPED_PMPI_CALL(int, MPI_File_read_ordered,(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status), (fh, buf, count, datatype, status)) +WRAPPED_PMPI_CALL(int, MPI_File_write_ordered,(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status), (fh, buf, count, datatype, status)) +WRAPPED_PMPI_CALL(int, MPI_File_seek_shared,(MPI_File fh, MPI_Offset offset, int whence), (fh, offset, whence)) +WRAPPED_PMPI_CALL(int, MPI_File_get_position_shared,(MPI_File fh, MPI_Offset *offset), (fh, offset)) UNIMPLEMENTED_WRAPPED_PMPI_CALL(int, MPI_File_read_at_all_begin,(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype), (fh, offset, buf, count, datatype)) UNIMPLEMENTED_WRAPPED_PMPI_CALL(int, MPI_File_read_at_all_end,(MPI_File fh, void *buf, MPI_Status *status), (fh, buf, status)) UNIMPLEMENTED_WRAPPED_PMPI_CALL(int, MPI_File_write_at_all_begin,(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype), (fh, offset, buf, count, datatype)) diff --git a/src/smpi/bindings/smpi_pmpi_file.cpp b/src/smpi/bindings/smpi_pmpi_file.cpp index 1317eb7e30..ce31d25297 100644 --- a/src/smpi/bindings/smpi_pmpi_file.cpp +++ b/src/smpi/bindings/smpi_pmpi_file.cpp @@ -61,6 +61,15 @@ int PMPI_File_seek(MPI_File fh, MPI_Offset offset, int whence){ } } +int PMPI_File_seek_shared(MPI_File fh, MPI_Offset offset, int whence){ + CHECK_FILE(fh); + else { + smpi_bench_end(); + int ret = fh->seek_shared(offset,whence); + smpi_bench_begin(); + return ret; + } +} int PMPI_File_get_position(MPI_File fh, MPI_Offset* offset){ CHECK_FILE(fh); @@ -74,6 +83,18 @@ int PMPI_File_get_position(MPI_File fh, MPI_Offset* offset){ } } +int PMPI_File_get_position_shared(MPI_File fh, MPI_Offset* offset){ + CHECK_FILE(fh); + else if (offset==nullptr) + return MPI_ERR_DISP; + else { + smpi_bench_end(); + int ret = fh->get_position_shared(offset); + smpi_bench_begin(); + return ret; + } +} + int PMPI_File_read(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status){ CHECK_FILE(fh); CHECK_BUFFER(buf, count); @@ -93,6 +114,25 @@ int PMPI_File_read(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_ } } +int PMPI_File_read_shared(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status){ + CHECK_FILE(fh); + CHECK_BUFFER(buf, count); + CHECK_COUNT(count); + PASS_ZEROCOUNT(count); + CHECK_DATATYPE(datatype, count); + CHECK_STATUS(status); + CHECK_FLAGS(fh); + else { + smpi_bench_end(); + int rank_traced = simgrid::s4u::this_actor::get_pid(); + TRACE_smpi_comm_in(rank_traced, __func__, new simgrid::instr::CpuTIData("IO - read_shared", static_cast(count*datatype->size()))); + int ret = simgrid::smpi::File::read_shared(fh, buf, count, datatype, status); + TRACE_smpi_comm_out(rank_traced); + smpi_bench_begin(); + return ret; + } +} + int PMPI_File_write(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status){ CHECK_FILE(fh); CHECK_BUFFER(buf, count); @@ -112,6 +152,25 @@ int PMPI_File_write(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI } } +int PMPI_File_write_shared(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status){ + CHECK_FILE(fh); + CHECK_BUFFER(buf, count); + CHECK_COUNT(count); + PASS_ZEROCOUNT(count); + CHECK_DATATYPE(datatype, count); + CHECK_STATUS(status); + CHECK_FLAGS(fh); + else { + smpi_bench_end(); + int rank_traced = simgrid::s4u::this_actor::get_pid(); + TRACE_smpi_comm_in(rank_traced, __func__, new simgrid::instr::CpuTIData("IO - write_shared", static_cast(count*datatype->size()))); + int ret = simgrid::smpi::File::write_shared(fh, buf, count, datatype, status); + TRACE_smpi_comm_out(rank_traced); + smpi_bench_begin(); + return ret; + } +} + int PMPI_File_read_all(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status){ CHECK_FILE(fh); CHECK_BUFFER(buf, count); @@ -130,6 +189,24 @@ int PMPI_File_read_all(MPI_File fh, void *buf, int count,MPI_Datatype datatype, } } +int PMPI_File_read_ordered(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status){ + CHECK_FILE(fh); + CHECK_BUFFER(buf, count); + CHECK_COUNT(count); + CHECK_DATATYPE(datatype, count); + CHECK_STATUS(status); + CHECK_FLAGS(fh); + else { + smpi_bench_end(); + int rank_traced = simgrid::s4u::this_actor::get_pid(); + TRACE_smpi_comm_in(rank_traced, __func__, new simgrid::instr::CpuTIData("IO - read_ordered", static_cast(count*datatype->size()))); + int ret = simgrid::smpi::File::read_ordered(fh, buf, count, datatype, status); + TRACE_smpi_comm_out(rank_traced); + smpi_bench_begin(); + return ret; + } +} + int PMPI_File_write_all(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status){ CHECK_FILE(fh); CHECK_BUFFER(buf, count); @@ -148,6 +225,24 @@ int PMPI_File_write_all(MPI_File fh, void *buf, int count,MPI_Datatype datatype, } } +int PMPI_File_write_ordered(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status){ + CHECK_FILE(fh); + CHECK_BUFFER(buf, count); + CHECK_COUNT(count); + CHECK_DATATYPE(datatype, count); + CHECK_STATUS(status); + CHECK_FLAGS(fh); + else { + smpi_bench_end(); + int rank_traced = simgrid::s4u::this_actor::get_pid(); + TRACE_smpi_comm_in(rank_traced, __func__, new simgrid::instr::CpuTIData("IO - write_ordered", static_cast(count*datatype->size()))); + int ret = simgrid::smpi::File::write_ordered(fh, buf, count, datatype, status); + TRACE_smpi_comm_out(rank_traced); + smpi_bench_begin(); + return ret; + } +} + int PMPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf, int count,MPI_Datatype datatype, MPI_Status *status){ CHECK_FILE(fh); CHECK_BUFFER(buf, count); diff --git a/src/smpi/include/smpi_file.hpp b/src/smpi/include/smpi_file.hpp index 7c89f8bd6c..42fd949a9d 100644 --- a/src/smpi/include/smpi_file.hpp +++ b/src/smpi/include/smpi_file.hpp @@ -22,18 +22,28 @@ class File{ simgrid::s4u::File* file_; MPI_Info info_; MPI_Offset shared_file_pointer_; + MPI_Win win_; + char* list_; public: File(MPI_Comm comm, char *filename, int amode, MPI_Info info); ~File(); int size(); int get_position(MPI_Offset* offset); + int get_position_shared(MPI_Offset* offset); int flags(); int sync(); int seek(MPI_Offset offset, int whence); + int seek_shared(MPI_Offset offset, int whence); + int lock(); + int unlock(); MPI_Info info(); void set_info( MPI_Info info); static int read(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status); + static int read_shared(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status); + static int read_ordered(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status); static int write(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status); + static int write_shared(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status); + static int write_ordered(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status); template int op_all(void *buf, int count,MPI_Datatype datatype, MPI_Status *status); static int close(MPI_File *fh); static int del(char *filename, MPI_Info info); diff --git a/src/smpi/mpi/smpi_file.cpp b/src/smpi/mpi/smpi_file.cpp index 6006b4d848..3aec0f2a4e 100644 --- a/src/smpi/mpi/smpi_file.cpp +++ b/src/smpi/mpi/smpi_file.cpp @@ -8,18 +8,31 @@ #include "smpi_coll.hpp" #include "smpi_datatype.hpp" #include "smpi_info.hpp" +#include "smpi_win.hpp" #include "smpi_file.hpp" +#include "smpi_status.hpp" #include "simgrid/plugins/file_system.h" XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_io, smpi, "Logging specific to SMPI (RMA operations)"); +#define FP_SIZE sizeof(MPI_Offset) namespace simgrid{ namespace smpi{ - File::File(MPI_Comm comm, char *filename, int amode, MPI_Info info): comm_(comm), flags_(amode), info_(info){ + + File::File(MPI_Comm comm, char *filename, int amode, MPI_Info info): comm_(comm), flags_(amode), info_(info), shared_file_pointer_(0) { file_= new simgrid::s4u::File(filename, nullptr); + list_=nullptr; + if (comm_->rank() == 0) { + int size= comm_->size() + FP_SIZE; + list_ = new char[size]; + memset(list_, 0, size); + win_=new Win(list_, size, 1, MPI_INFO_NULL, comm_); + }else{ + win_=new Win(list_, 0, 1, MPI_INFO_NULL, comm_); + } } - + File::~File(){ delete file_; } @@ -45,6 +58,13 @@ namespace smpi{ return MPI_SUCCESS; } + int File::get_position_shared(MPI_Offset* offset){ + lock(); + *offset=shared_file_pointer_; + unlock(); + return MPI_SUCCESS; + } + int File::seek(MPI_Offset offset, int whence){ switch(whence){ case(MPI_SEEK_SET): @@ -64,7 +84,15 @@ namespace smpi{ } return MPI_SUCCESS; } - + + int File::seek_shared(MPI_Offset offset, int whence){ + lock(); + seek(offset,whence); + shared_file_pointer_=file_->tell(); + unlock(); + return MPI_SUCCESS; + } + int File::read(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status){ //get position first as we may be doing non contiguous reads and it will probably be updated badly MPI_Offset position = fh->file_->tell(); @@ -77,9 +105,39 @@ namespace smpi{ fh->file_->seek(position+movesize, SEEK_SET); } XBT_DEBUG("Position after read in MPI_File %s : %llu",fh->file_->get_path(), fh->file_->tell()); + status->count=count*datatype->size(); + return MPI_SUCCESS; + } + + int File::read_shared(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status){ + fh->lock(); + fh->seek(fh->shared_file_pointer_,MPI_SEEK_SET); + read(fh, buf, count, datatype, status); + fh->shared_file_pointer_=fh->file_->tell(); + fh->unlock(); return MPI_SUCCESS; } + int File::read_ordered(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status){ + //0 needs to get the shared pointer value + if(fh->comm_->rank()==0){ + fh->lock(); + fh->unlock(); + }else{ + fh->shared_file_pointer_=count*datatype->size(); + } + MPI_Offset result; + simgrid::smpi::Colls::scan(&(fh->shared_file_pointer_), &result, 1, MPI_OFFSET, MPI_SUM, fh->comm_); + fh->seek(result, MPI_SEEK_SET); + int ret = fh->op_all(buf, count, datatype, status); + if(fh->comm_->rank()==fh->comm_->size()-1){ + fh->lock(); + fh->unlock(); + } + char c; + simgrid::smpi::Colls::bcast(&c, 1, MPI_BYTE, fh->comm_->size()-1, fh->comm_); + return ret; + } int File::write(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status){ //get position first as we may be doing non contiguous reads and it will probably be updated badly @@ -93,9 +151,40 @@ namespace smpi{ fh->file_->seek(position+movesize, SEEK_SET); } XBT_DEBUG("Position after write in MPI_File %s : %llu",fh->file_->get_path(), fh->file_->tell()); + status->count=count*datatype->size(); return MPI_SUCCESS; } + int File::write_shared(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status){ + fh->lock(); + fh->seek(fh->shared_file_pointer_,MPI_SEEK_SET); + write(fh, buf, count, datatype, status); + fh->shared_file_pointer_=fh->file_->tell(); + fh->unlock(); + return MPI_SUCCESS; + } + + int File::write_ordered(MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status){ + //0 needs to get the shared pointer value + if(fh->comm_->rank()==0){ + fh->lock(); + fh->unlock(); + }else{ + fh->shared_file_pointer_=count*datatype->size(); + } + MPI_Offset result; + simgrid::smpi::Colls::scan(&(fh->shared_file_pointer_), &result, 1, MPI_OFFSET, MPI_SUM, fh->comm_); + fh->seek(result, MPI_SEEK_SET); + int ret = fh->op_all(buf, count, datatype, status); + if(fh->comm_->rank()==fh->comm_->size()-1){ + fh->lock(); + fh->unlock(); + } + char c; + simgrid::smpi::Colls::bcast(&c, 1, MPI_BYTE, fh->comm_->size()-1, fh->comm_); + return ret; + } + int File::size(){ return file_->size(); } @@ -108,6 +197,55 @@ namespace smpi{ //no idea return simgrid::smpi::Colls::barrier(comm_); } + + int File::lock() +{ + int rank = comm_->rank(); + int size = comm_->size(); + char waitlist[size]; + char lock = 1; + int tag=444; + int i; + win_->lock(MPI_LOCK_EXCLUSIVE, 0, 0); + win_->put(&lock, 1, MPI_CHAR, 0, FP_SIZE+rank, 1, MPI_CHAR); + win_->get(waitlist, size, MPI_CHAR, 0, FP_SIZE, size, MPI_CHAR); + win_->get(&shared_file_pointer_ , 1 , MPI_OFFSET , 0 , 0, 1, MPI_OFFSET); + win_->unlock(0); + for (i = 0; i < size; i++) { + if (waitlist[i] == 1 && i != rank) { + // wait for the lock + MPI_Recv(&lock, 1, MPI_CHAR, MPI_ANY_SOURCE, tag, comm_, MPI_STATUS_IGNORE); + break; + } + } + return 0; +} + +int File::unlock() +{ + int rank = comm_->rank(); + int size = comm_->size(); + char waitlist[size]; + char lock = 0; + int tag=444; + int i, next; + win_->lock(MPI_LOCK_EXCLUSIVE, 0, 0); + win_->put(&lock, 1, MPI_CHAR, 0, FP_SIZE+rank, 1, MPI_CHAR); + win_->get(waitlist, size, MPI_CHAR, 0, FP_SIZE, size, MPI_CHAR); + shared_file_pointer_=file_->tell(); + win_->put(&shared_file_pointer_, 1 , MPI_OFFSET , 0 , 0, 1, MPI_OFFSET); + + win_->unlock(0); + next = (rank + 1 + size) % size; + for (i = 0; i < size; i++, next = (next + 1) % size) { + if (waitlist[next] == 1) { + MPI_Send(&lock, 1, MPI_CHAR, next, tag, comm_); + break; + } + } + return 0; +} + MPI_Info File::info(){ if(info_== MPI_INFO_NULL) info_ = new Info(); -- 2.20.1