3 /* buf trp (transport) - buffered transport using the TCP one */
5 /* Copyright (c) 2004 Martin Quinson. All rights reserved. */
7 /* This program is free software; you can redistribute it and/or modify it
8 * under the terms of the license (GNU LGPL) which comes with this package. */
11 #include <string.h> /* memset */
15 #include "xbt/sysdep.h"
17 #include "gras/Transport/transport_private.h"
18 #include "gras/Msg/msg_interface.h" /* listener_close_socket */
20 /* FIXME maybe READV is sometime a good thing? */
28 #define MIN(a,b) ((a)<(b)?(a):(b))
31 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(gras_trp_tcp,gras_trp,
32 "TCP buffered transport");
35 *** Specific socket part
38 typedef enum { buffering_buf, buffering_iov } buffering_kind;
43 int pos; /* for receive; not exchanged over the net */
47 struct gras_trp_bufdata_{
49 gras_trp_buf_t in_buf;
50 gras_trp_buf_t out_buf;
54 xbt_dynar_t out_buf_v;
62 /*****************************/
63 /****[ SOCKET MANAGEMENT ]****/
64 /*****************************/
65 /* we exchange port number on client side on socket creation,
66 so we need to be able to talk right now. */
67 static XBT_INLINE void gras_trp_tcp_send(gras_socket_t sock, const char *data,
68 unsigned long int size);
69 static int gras_trp_tcp_recv(gras_socket_t sock, char *data,
70 unsigned long int size);
73 static int _gras_tcp_proto_number(void);
75 static XBT_INLINE void gras_trp_sock_socket_client(gras_trp_plugin_t ignored,
78 struct sockaddr_in addr;
80 struct in_addr *haddr;
81 int size = sock->buf_size;
82 uint32_t myport = htonl(((gras_trp_procdata_t) gras_libdata_by_id(gras_trp_libdata_id))->myport);
84 sock->incoming = 1; /* TCP sockets are duplex'ed */
86 sock->sd = socket (AF_INET, SOCK_STREAM, 0);
89 THROW1(system_error,0, "Failed to create socket: %s", sock_errstr(sock_errno));
92 if (setsockopt(sock->sd, SOL_SOCKET, SO_RCVBUF, (char *)&size, sizeof(size)) ||
93 setsockopt(sock->sd, SOL_SOCKET, SO_SNDBUF, (char *)&size, sizeof(size))) {
94 VERB1("setsockopt failed, cannot set buffer size: %s",sock_errstr(sock_errno));
97 he = gethostbyname (sock->peer_name);
99 THROW2(system_error,0, "Failed to lookup hostname %s: %s",
100 sock->peer_name, sock_errstr(sock_errno));
103 haddr = ((struct in_addr *) (he->h_addr_list)[0]);
105 memset(&addr, 0, sizeof(struct sockaddr_in));
106 memcpy (&addr.sin_addr, haddr, sizeof(struct in_addr));
107 addr.sin_family = AF_INET;
108 addr.sin_port = htons (sock->peer_port);
110 if (connect (sock->sd, (struct sockaddr*) &addr, sizeof (addr)) < 0) {
112 THROW3(system_error,0,
113 "Failed to connect socket to %s:%d (%s)",
114 sock->peer_name, sock->peer_port, sock_errstr(sock_errno));
117 gras_trp_tcp_send(sock,(char*)&myport,sizeof(uint32_t));
118 DEBUG1("peerport sent to %d", sock->peer_port);
120 VERB4("Connect to %s:%d (sd=%d, port %d here)",
121 sock->peer_name, sock->peer_port, sock->sd, sock->port);
125 * gras_trp_sock_socket_server:
127 * Open a socket used to receive messages.
129 static XBT_INLINE void gras_trp_sock_socket_server(gras_trp_plugin_t ignored,
131 int size = sock->buf_size;
133 struct sockaddr_in server;
135 sock->outgoing = 1; /* TCP => duplex mode */
137 server.sin_port = htons((u_short)sock->port);
138 server.sin_addr.s_addr = INADDR_ANY;
139 server.sin_family = AF_INET;
140 if((sock->sd = socket(AF_INET, SOCK_STREAM, 0)) < 0)
141 THROW1(system_error,0,"Socket allocation failed: %s", sock_errstr(sock_errno));
143 if (setsockopt(sock->sd, SOL_SOCKET, SO_REUSEADDR, (char *)&on, sizeof(on)))
144 THROW1(system_error,0,
145 "setsockopt failed, cannot condition the socket: %s",
146 sock_errstr(sock_errno));
148 if ( setsockopt(sock->sd, SOL_SOCKET, SO_RCVBUF,
149 (char *)&size, sizeof(size))
150 || setsockopt(sock->sd, SOL_SOCKET, SO_SNDBUF,
151 (char *)&size, sizeof(size))) {
152 VERB1("setsockopt failed, cannot set buffer size: %s",
153 sock_errstr(sock_errno));
156 if (bind(sock->sd, (struct sockaddr *)&server, sizeof(server)) == -1) {
158 THROW2(system_error,0,
159 "Cannot bind to port %d: %s",
160 sock->port, sock_errstr(sock_errno));
163 DEBUG2("Listen on port %d (sd=%d)",sock->port, sock->sd);
164 if (listen(sock->sd, 5) < 0) {
166 THROW2(system_error,0,
167 "Cannot listen on port %d: %s",
168 sock->port,sock_errstr(sock_errno));
171 VERB2("Openned a server socket on port %d (sd=%d)",sock->port,sock->sd);
174 static gras_socket_t gras_trp_sock_socket_accept(gras_socket_t sock) {
177 struct sockaddr_in peer_in;
178 socklen_t peer_in_len = sizeof(peer_in);
185 socklen_t s = sizeof(int);
190 gras_trp_socket_new(1,&res);
192 sd = accept(sock->sd, (struct sockaddr *)&peer_in, &peer_in_len);
193 tmp_errno = sock_errno;
196 gras_socket_close(sock);
197 THROW1(system_error,0,
198 "Accept failed (%s). Droping server socket.", sock_errstr(tmp_errno));
201 if (setsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, (char *)&i, s)
202 || setsockopt(sd, _gras_tcp_proto_number(), TCP_NODELAY, (char *)&i, s))
203 THROW1(system_error,0,"setsockopt failed, cannot condition the socket: %s",
204 sock_errstr(tmp_errno));
206 res->buf_size = sock->buf_size;
207 size = sock->buf_size;
208 if (setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *)&size, sizeof(size))
209 || setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *)&size, sizeof(size)))
210 VERB1("setsockopt failed, cannot set buffer size: %s", sock_errstr(tmp_errno));
212 res->plugin = sock->plugin;
213 res->incoming = sock->incoming;
214 res->outgoing = sock->outgoing;
219 gras_trp_tcp_recv(res,(char*)&hisport,sizeof(hisport));
220 res->peer_port = ntohl(hisport);
221 DEBUG1("peerport %d received",res->peer_port);
223 /* FIXME: Lock to protect inet_ntoa */
224 if (((struct sockaddr *)&peer_in)->sa_family != AF_INET) {
225 res->peer_name = (char*)strdup("unknown");
227 struct in_addr addrAsInAddr;
230 addrAsInAddr.s_addr = peer_in.sin_addr.s_addr;
232 tmp = inet_ntoa(addrAsInAddr);
234 res->peer_name = (char*)strdup(tmp);
236 res->peer_name = (char*)strdup("unknown");
240 VERB3("Accepted from %s:%d (sd=%d)", res->peer_name,res->peer_port,sd);
241 xbt_dynar_push(((gras_trp_procdata_t)
242 gras_libdata_by_id(gras_trp_libdata_id))->sockets,&res);
248 static void gras_trp_sock_socket_close(gras_socket_t sock){
250 if (!sock) return; /* close only once */
252 VERB1("close tcp connection %d", sock->sd);
254 /* ask the listener to close the socket */
255 gras_msg_listener_close_socket(sock->sd);
257 /************************************/
258 /****[ end of SOCKET MANAGEMENT ]****/
259 /************************************/
262 /************************************/
263 /****[ UNBUFFERED DATA EXCHANGE ]****/
264 /************************************/
265 /* Temptation to merge this with file data exchange is great,
266 but doesn't work on BillWare (see tcp_write() in portable.h) */
267 static XBT_INLINE void gras_trp_tcp_send(gras_socket_t sock,
269 unsigned long int size) {
274 status = tcp_write(sock->sd, data, (size_t)size);
275 DEBUG3("write(%d, %p, %ld);", sock->sd, data, size);
279 if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK)
281 if (errno == EINTR || errno == EAGAIN)
285 THROW4(system_error,0,"write(%d,%p,%ld) failed: %s",
286 sock->sd, data, size,
287 sock_errstr(sock_errno));
294 THROW1(system_error,0,"file descriptor closed (%s)",
295 sock_errstr(sock_errno));
299 static XBT_INLINE int
300 gras_trp_tcp_recv_withbuffer(gras_socket_t sock,
302 unsigned long int size,
303 unsigned long int bufsize) {
308 data[0] = sock->recvd_val;
317 DEBUG5("read(%d, %p, %ld) got %d so far (%s)",
318 sock->sd, data+got, bufsize, got,
319 hexa_str((unsigned char*)data,got,0));
320 status = tcp_read(sock->sd, data+got, (size_t)bufsize);
323 THROW7(system_error,0,"read(%d,%p,%d) from %s:%d failed: %s; got %d so far",
324 sock->sd, data+got, (int)size,
325 gras_socket_peer_name(sock),gras_socket_peer_port(sock),
326 sock_errstr(sock_errno),
329 DEBUG2("Got %d more bytes (%s)",status,hexa_str((unsigned char*)data+got,status,0));
335 THROW1(system_error,errno,"Socket closed by remote side (got %d bytes before this)",
343 static int gras_trp_tcp_recv(gras_socket_t sock,
345 unsigned long int size) {
346 return gras_trp_tcp_recv_withbuffer(sock,data,size,size);
349 /*******************************************/
350 /****[ end of UNBUFFERED DATA EXCHANGE ]****/
351 /*******************************************/
353 /**********************************/
354 /****[ BUFFERED DATA EXCHANGE ]****/
355 /**********************************/
357 /* Make sure the data is sent */
359 gras_trp_bufiov_flush(gras_socket_t sock) {
364 gras_trp_bufdata_t *data=sock->bufdata;
368 if (data->out == buffering_buf) {
369 if (XBT_LOG_ISENABLED(gras_trp_tcp,xbt_log_priority_debug))
370 hexa_print("chunk to send ",
371 (unsigned char *) data->out_buf.data,data->out_buf.size);
372 if ((data->out_buf.size - data->out_buf.pos) != 0) {
373 DEBUG3("Send the chunk (size=%d) to %s:%d",data->out_buf.size,
374 gras_socket_peer_name(sock),gras_socket_peer_port(sock));
375 gras_trp_tcp_send(sock, data->out_buf.data, data->out_buf.size);
376 VERB1("Chunk sent (size=%d)",data->out_buf.size);
377 data->out_buf.size = 0;
382 if (data->out == buffering_iov) {
383 DEBUG0("Flush out iov");
384 vect = sock->bufdata->out_buf_v;
385 if ((size = xbt_dynar_length(vect))) {
386 DEBUG1("Flush %d chunks out of this socket",size);
387 writev(sock->sd,xbt_dynar_get_ptr(vect,0),size);
388 xbt_dynar_reset(vect);
390 data->out_buf.size = 0; /* reset the buffer containing non-stable data */
393 if (data->in == buffering_iov) {
394 DEBUG0("Flush in iov");
395 vect = sock->bufdata->in_buf_v;
396 if ((size = xbt_dynar_length(vect))) {
397 DEBUG1("Get %d chunks from of this socket",size);
398 readv(sock->sd,xbt_dynar_get_ptr(vect,0),size);
399 xbt_dynar_reset(vect);
405 gras_trp_buf_send(gras_socket_t sock,
407 unsigned long int size,
408 int stable_ignored) {
410 gras_trp_bufdata_t *data=(gras_trp_bufdata_t*)sock->bufdata;
415 while (chunk_pos < size) {
416 /* size of the chunk to receive in that shot */
417 long int thissize = min(size-chunk_pos,data->buffsize-data->out_buf.size);
418 DEBUG4("Set the chars %d..%ld into the buffer; size=%ld, ctn=(%s)",
419 (int)data->out_buf.size,
420 ((int)data->out_buf.size) + thissize -1,
422 hexa_str((unsigned char*)chunk,thissize,0));
424 memcpy(data->out_buf.data + data->out_buf.size, chunk + chunk_pos, thissize);
426 data->out_buf.size += thissize;
427 chunk_pos += thissize;
428 DEBUG4("New pos = %d; Still to send = %ld of %ld; ctn sofar=(%s)",
429 data->out_buf.size,size-chunk_pos,size,hexa_str((unsigned char*)chunk,chunk_pos,0));
431 if (data->out_buf.size == data->buffsize) /* out of space. Flush it */
432 gras_trp_bufiov_flush(sock);
439 gras_trp_buf_recv(gras_socket_t sock,
441 unsigned long int size) {
443 gras_trp_bufdata_t *data=sock->bufdata;
444 long int chunk_pos = 0;
448 while (chunk_pos < size) {
449 /* size of the chunk to receive in that shot */
452 if (data->in_buf.size == data->in_buf.pos) { /* out of data. Get more */
454 DEBUG2("Get more data (size=%d,bufsize=%d)",
455 (int)MIN(size-chunk_pos,data->buffsize),
456 (int)data->buffsize);
460 gras_trp_tcp_recv_withbuffer(sock, data->in_buf.data,
461 MIN(size-chunk_pos,data->buffsize),
467 thissize = min(size-chunk_pos , data->in_buf.size - data->in_buf.pos);
468 memcpy(chunk+chunk_pos, data->in_buf.data + data->in_buf.pos, thissize);
470 data->in_buf.pos += thissize;
471 chunk_pos += thissize;
472 DEBUG4("New pos = %d; Still to receive = %ld of %ld. Ctn so far=(%s)",
473 data->in_buf.pos,size - chunk_pos,size,hexa_str((unsigned char*)chunk,chunk_pos,0));
475 /* indicate on need to the gras_select function that there is more to read on this socket so that it does not actually select */
476 sock->moredata = (data->in_buf.size > data->in_buf.pos);
477 DEBUG1("There is %smore data",(sock->moredata?"":"no "));
483 /*****************************************/
484 /****[ end of BUFFERED DATA EXCHANGE ]****/
485 /*****************************************/
487 /********************************/
488 /****[ VECTOR DATA EXCHANGE ]****/
489 /********************************/
492 gras_trp_iov_send(gras_socket_t sock,
494 unsigned long int size,
497 gras_trp_bufdata_t *data=(gras_trp_bufdata_t*)sock->bufdata;
500 DEBUG1("Buffer one chunk to be sent later (%s)",
501 hexa_str((char*)chunk,size,0));
503 elm.iov_len = (size_t)size;
506 /* data storage won't last until flush. Save it in a buffer if we can */
508 if (size > data->buffsize-data->out_buf.size) {
510 flush the socket, using data in its actual storage */
511 elm.iov_base = (void*)chunk;
512 xbt_dynar_push(data->out_buf_v,&elm);
514 gras_trp_bufiov_flush(sock);
517 /* buffer big enough:
518 copy data into it, and chain it for upcoming writev */
519 memcpy(data->out_buf.data + data->out_buf.size, chunk, size);
520 elm.iov_base = (void*)(data->out_buf.data + data->out_buf.size);
521 data->out_buf.size += size;
523 xbt_dynar_push(data->out_buf_v,&elm);
527 /* data storage stable. Chain it */
529 elm.iov_base = (void*)chunk;
530 xbt_dynar_push(data->out_buf_v,&elm);
534 gras_trp_iov_recv(gras_socket_t sock,
536 unsigned long int size) {
539 DEBUG0("Buffer one chunk to be received later");
540 elm.iov_base = (void*)chunk;
541 elm.iov_len = (size_t)size;
542 xbt_dynar_push(sock->bufdata->in_buf_v,&elm);
548 /***************************************/
549 /****[ end of VECTOR DATA EXCHANGE ]****/
550 /***************************************/
554 *** Prototypes of BUFFERED
557 void gras_trp_buf_socket_client(gras_trp_plugin_t self,
559 void gras_trp_buf_socket_server(gras_trp_plugin_t self,
561 gras_socket_t gras_trp_buf_socket_accept(gras_socket_t sock);
563 void gras_trp_buf_socket_close(gras_socket_t sd);
566 gras_socket_t gras_trp_buf_init_sock(gras_socket_t sock) {
567 gras_trp_bufdata_t *data=xbt_new(gras_trp_bufdata_t,1);
569 data->buffsize = 100 * 1024 ; /* 100k */
571 data->in_buf.size = 0;
572 data->in_buf.data = xbt_malloc(data->buffsize);
573 data->in_buf.pos = 0; /* useless, indeed, since size==pos */
575 data->out_buf.size = 0;
576 data->out_buf.data = xbt_malloc(data->buffsize);
577 data->out_buf.pos = data->out_buf.size;
580 data->in_buf_v = data->out_buf_v = NULL;
581 data->in_buf_v=xbt_dynar_new(sizeof(struct iovec),NULL);
582 data->out_buf_v=xbt_dynar_new(sizeof(struct iovec),NULL);
583 data->out = buffering_iov;
585 data->out = buffering_buf;
588 data->in = buffering_buf;
590 sock->bufdata = data;
598 gras_trp_tcp_setup(gras_trp_plugin_t plug) {
600 plug->socket_client = gras_trp_buf_socket_client;
601 plug->socket_server = gras_trp_buf_socket_server;
602 plug->socket_accept = gras_trp_buf_socket_accept;
603 plug->socket_close = gras_trp_buf_socket_close;
606 plug->send = gras_trp_iov_send;
608 plug->send = gras_trp_buf_send;
610 plug->recv = gras_trp_buf_recv;
612 plug->raw_send = gras_trp_tcp_send;
613 plug->raw_recv = gras_trp_tcp_recv;
615 plug->flush = gras_trp_bufiov_flush;
621 void gras_trp_buf_socket_client(gras_trp_plugin_t self,
622 /* OUT */ gras_socket_t sock){
624 gras_trp_sock_socket_client(NULL,sock);
625 gras_trp_buf_init_sock(sock);
629 * gras_trp_buf_socket_server:
631 * Open a socket used to receive messages.
633 void gras_trp_buf_socket_server(gras_trp_plugin_t self,
634 /* OUT */ gras_socket_t sock){
636 gras_trp_sock_socket_server(NULL,sock);
637 gras_trp_buf_init_sock(sock);
640 gras_socket_t gras_trp_buf_socket_accept(gras_socket_t sock) {
641 return gras_trp_buf_init_sock(gras_trp_sock_socket_accept(sock));
644 void gras_trp_buf_socket_close(gras_socket_t sock){
645 gras_trp_bufdata_t *data=sock->bufdata;
647 if (data->in_buf.size!=data->in_buf.pos) {
648 WARN3("Socket closed, but %d bytes were unread (size=%d,pos=%d)",
649 data->in_buf.size - data->in_buf.pos,
650 data->in_buf.size, data->in_buf.pos);
652 if (data->in_buf.data)
653 free(data->in_buf.data);
655 if (data->out_buf.size!=data->out_buf.pos) {
656 DEBUG2("Flush the socket before closing (in=%d,out=%d)",
657 data->in_buf.size, data->out_buf.size);
658 gras_trp_bufiov_flush(sock);
660 if (data->out_buf.data)
661 free(data->out_buf.data);
664 if (data->in_buf_v) {
665 if (xbt_dynar_length(data->in_buf_v))
666 WARN0("Socket closed, but some bytes were unread");
667 xbt_dynar_free(&data->in_buf_v);
669 if (data->out_buf_v) {
670 if (xbt_dynar_length(data->out_buf_v)) {
671 DEBUG0("Flush the socket before closing");
672 gras_trp_bufiov_flush(sock);
674 xbt_dynar_free(&data->out_buf_v);
679 gras_trp_sock_socket_close(sock);
682 /****************************/
683 /****[ HELPER FUNCTIONS ]****/
684 /****************************/
687 * Returns the tcp protocol number from the network protocol data base.
689 * getprotobyname() is not thread safe. We need to lock it.
691 static int _gras_tcp_proto_number(void) {
692 struct protoent *fetchedEntry;
693 static int returnValue = 0;
695 if(returnValue == 0) {
696 fetchedEntry = getprotobyname("tcp");
697 xbt_assert0(fetchedEntry, "getprotobyname(tcp) gave NULL");
698 returnValue = fetchedEntry->p_proto;
704 #ifdef HAVE_WINSOCK_H
705 #define RETSTR( x ) case x: return #x
707 const char *gras_wsa_err2string( int err ) {
715 RETSTR( WSAEWOULDBLOCK );
716 RETSTR( WSAEINPROGRESS );
717 RETSTR( WSAEALREADY );
718 RETSTR( WSAENOTSOCK );
719 RETSTR( WSAEDESTADDRREQ );
720 RETSTR( WSAEMSGSIZE );
721 RETSTR( WSAEPROTOTYPE );
722 RETSTR( WSAENOPROTOOPT );
723 RETSTR( WSAEPROTONOSUPPORT );
724 RETSTR( WSAESOCKTNOSUPPORT );
725 RETSTR( WSAEOPNOTSUPP );
726 RETSTR( WSAEPFNOSUPPORT );
727 RETSTR( WSAEAFNOSUPPORT );
728 RETSTR( WSAEADDRINUSE );
729 RETSTR( WSAEADDRNOTAVAIL );
730 RETSTR( WSAENETDOWN );
731 RETSTR( WSAENETUNREACH );
732 RETSTR( WSAENETRESET );
733 RETSTR( WSAECONNABORTED );
734 RETSTR( WSAECONNRESET );
735 RETSTR( WSAENOBUFS );
736 RETSTR( WSAEISCONN );
737 RETSTR( WSAENOTCONN );
738 RETSTR( WSAESHUTDOWN );
739 RETSTR( WSAETOOMANYREFS );
740 RETSTR( WSAETIMEDOUT );
741 RETSTR( WSAECONNREFUSED );
743 RETSTR( WSAENAMETOOLONG );
744 RETSTR( WSAEHOSTDOWN );
745 RETSTR( WSAEHOSTUNREACH );
746 RETSTR( WSAENOTEMPTY );
747 RETSTR( WSAEPROCLIM );
751 RETSTR( WSAEREMOTE );
752 RETSTR( WSASYSNOTREADY );
753 RETSTR( WSAVERNOTSUPPORTED );
754 RETSTR( WSANOTINITIALISED );
755 RETSTR( WSAEDISCON );
758 RETSTR( WSAENOMORE );
759 RETSTR( WSAECANCELLED );
760 RETSTR( WSAEINVALIDPROCTABLE );
761 RETSTR( WSAEINVALIDPROVIDER );
762 RETSTR( WSASYSCALLFAILURE );
763 RETSTR( WSASERVICE_NOT_FOUND );
764 RETSTR( WSATYPE_NOT_FOUND );
765 RETSTR( WSA_E_NO_MORE );
766 RETSTR( WSA_E_CANCELLED );
767 RETSTR( WSAEREFUSED );
768 #endif /* HAVE_WINSOCK2 */
770 RETSTR( WSAHOST_NOT_FOUND );
771 RETSTR( WSATRY_AGAIN );
772 RETSTR( WSANO_RECOVERY );
773 RETSTR( WSANO_DATA );
775 return "unknown WSA error";
777 #endif /* HAVE_WINSOCK_H */
779 /***********************************/
780 /****[ end of HELPER FUNCTIONS ]****/
781 /***********************************/