3 /* buf trp (transport) - buffered transport using the TCP one */
5 /* Copyright (c) 2004 Martin Quinson. All rights reserved. */
7 /* This program is free software; you can redistribute it and/or modify it
8 * under the terms of the license (GNU LGPL) which comes with this package. */
11 #include <string.h> /* memset */
15 #include "xbt/sysdep.h"
17 #include "transport_private.h"
19 /* FIXME maybe READV is sometime a good thing? */
27 #define MIN(a,b) ((a)<(b)?(a):(b))
30 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(gras_trp_tcp,gras_trp,
31 "TCP buffered transport");
34 *** Specific socket part
37 typedef enum { buffering_buf, buffering_iov } buffering_kind;
42 int pos; /* for receive; not exchanged over the net */
46 struct gras_trp_bufdata_{
48 gras_trp_buf_t in_buf;
49 gras_trp_buf_t out_buf;
53 xbt_dynar_t out_buf_v;
61 /*****************************/
62 /****[ SOCKET MANAGEMENT ]****/
63 /*****************************/
64 /* we exchange port number on client side on socket creation,
65 so we need to be able to talk right now. */
66 static inline void gras_trp_tcp_send(gras_socket_t sock, const char *data,
67 unsigned long int size);
68 static int gras_trp_tcp_recv(gras_socket_t sock, char *data,
69 unsigned long int size);
72 static int _gras_tcp_proto_number(void);
74 static inline void gras_trp_sock_socket_client(gras_trp_plugin_t ignored,
77 struct sockaddr_in addr;
79 struct in_addr *haddr;
80 int size = sock->buf_size;
81 uint32_t myport = htonl(((gras_trp_procdata_t) gras_libdata_by_id(gras_trp_libdata_id))->myport);
83 sock->incoming = 1; /* TCP sockets are duplex'ed */
85 sock->sd = socket (AF_INET, SOCK_STREAM, 0);
88 THROW1(system_error,0, "Failed to create socket: %s", sock_errstr);
91 if (setsockopt(sock->sd, SOL_SOCKET, SO_RCVBUF, (char *)&size, sizeof(size)) ||
92 setsockopt(sock->sd, SOL_SOCKET, SO_SNDBUF, (char *)&size, sizeof(size))) {
93 WARN1("setsockopt failed, cannot set buffer size: %s",sock_errstr);
96 he = gethostbyname (sock->peer_name);
98 THROW2(system_error,0, "Failed to lookup hostname %s: %s",
99 sock->peer_name, sock_errstr);
102 haddr = ((struct in_addr *) (he->h_addr_list)[0]);
104 memset(&addr, 0, sizeof(struct sockaddr_in));
105 memcpy (&addr.sin_addr, haddr, sizeof(struct in_addr));
106 addr.sin_family = AF_INET;
107 addr.sin_port = htons (sock->peer_port);
109 if (connect (sock->sd, (struct sockaddr*) &addr, sizeof (addr)) < 0) {
111 THROW3(system_error,0,
112 "Failed to connect socket to %s:%d (%s)",
113 sock->peer_name, sock->peer_port, sock_errstr);
116 gras_trp_tcp_send(sock,(char*)&myport,sizeof(uint32_t));
117 DEBUG1("peerport sent to %d", sock->peer_port);
119 VERB4("Connect to %s:%d (sd=%d, port %d here)",
120 sock->peer_name, sock->peer_port, sock->sd, sock->port);
124 * gras_trp_sock_socket_server:
126 * Open a socket used to receive messages.
128 static inline void gras_trp_sock_socket_server(gras_trp_plugin_t ignored,
130 int size = sock->buf_size;
132 struct sockaddr_in server;
134 sock->outgoing = 1; /* TCP => duplex mode */
136 server.sin_port = htons((u_short)sock->port);
137 server.sin_addr.s_addr = INADDR_ANY;
138 server.sin_family = AF_INET;
139 if((sock->sd = socket(AF_INET, SOCK_STREAM, 0)) < 0)
140 THROW1(system_error,0,"Socket allocation failed: %s", sock_errstr);
142 if (setsockopt(sock->sd, SOL_SOCKET, SO_REUSEADDR, (char *)&on, sizeof(on)))
143 THROW1(system_error,0,"setsockopt failed, cannot condition the socket: %s",
146 if (setsockopt(sock->sd, SOL_SOCKET, SO_RCVBUF, (char *)&size, sizeof(size)) ||
147 setsockopt(sock->sd, SOL_SOCKET, SO_SNDBUF, (char *)&size, sizeof(size))) {
148 WARN1("setsockopt failed, cannot set buffer size: %s",
152 if (bind(sock->sd, (struct sockaddr *)&server, sizeof(server)) == -1) {
154 THROW2(system_error,0,"Cannot bind to port %d: %s",sock->port, sock_errstr);
157 DEBUG2("Listen on port %d (sd=%d)",sock->port, sock->sd);
158 if (listen(sock->sd, 5) < 0) {
160 THROW2(system_error,0,"Cannot listen on port %d: %s",sock->port,sock_errstr);
163 VERB2("Openned a server socket on port %d (sd=%d)",sock->port,sock->sd);
166 static gras_socket_t gras_trp_sock_socket_accept(gras_socket_t sock) {
169 struct sockaddr_in peer_in;
170 socklen_t peer_in_len = sizeof(peer_in);
177 socklen_t s = sizeof(int);
182 gras_trp_socket_new(1,&res);
184 sd = accept(sock->sd, (struct sockaddr *)&peer_in, &peer_in_len);
188 gras_socket_close(sock);
189 THROW1(system_error,0,
190 "Accept failed (%s). Droping server socket.", sock_errstr);
193 if (setsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, (char *)&i, s)
194 || setsockopt(sd, _gras_tcp_proto_number(), TCP_NODELAY, (char *)&i, s))
195 THROW1(system_error,0,"setsockopt failed, cannot condition the socket: %s",
198 res->buf_size = sock->buf_size;
199 size = sock->buf_size;
200 if (setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *)&size, sizeof(size))
201 || setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *)&size, sizeof(size)))
202 WARN1("setsockopt failed, cannot set buffer size: %s", sock_errstr);
204 res->plugin = sock->plugin;
205 res->incoming = sock->incoming;
206 res->outgoing = sock->outgoing;
211 gras_trp_tcp_recv(res,(char*)&hisport,sizeof(hisport));
212 res->peer_port = ntohl(hisport);
213 DEBUG1("peerport %d received",res->peer_port);
215 /* FIXME: Lock to protect inet_ntoa */
216 if (((struct sockaddr *)&peer_in)->sa_family != AF_INET) {
217 res->peer_name = (char*)strdup("unknown");
219 struct in_addr addrAsInAddr;
222 addrAsInAddr.s_addr = peer_in.sin_addr.s_addr;
224 tmp = inet_ntoa(addrAsInAddr);
226 res->peer_name = (char*)strdup(tmp);
228 res->peer_name = (char*)strdup("unknown");
232 VERB3("Accepted from %s:%d (sd=%d)", res->peer_name,res->peer_port,sd);
238 static void gras_trp_sock_socket_close(gras_socket_t sock){
240 if (!sock) return; /* close only once */
242 VERB1("close tcp connection %d", sock->sd);
244 /* FIXME: no pipe in GRAS so far
245 if(!FD_ISSET(sd, &connectedPipes)) {
246 if(shutdown(sd, 2) < 0) {
249 ReleaseNWSLock(&lock);
251 / * The other side may have beaten us to the reset. * /
252 if ((tmp_errno!=ENOTCONN) && (tmp_errno!=ECONNRESET)) {
253 WARN1("CloseSocket: shutdown error %d\n", tmp_errno);
259 /* close the socket */
260 if(tcp_close(sock->sd) < 0) {
261 WARN3("error while closing tcp socket %d: %d (%s)\n",
262 sock->sd, sock_errno, sock_errstr);
266 /************************************/
267 /****[ end of SOCKET MANAGEMENT ]****/
268 /************************************/
271 /************************************/
272 /****[ UNBUFFERED DATA EXCHANGE ]****/
273 /************************************/
274 /* Temptation to merge this with file data exchange is great,
275 but doesn't work on BillWare (see tcp_write() in portable.h) */
276 static inline void gras_trp_tcp_send(gras_socket_t sock,
278 unsigned long int size) {
283 status = tcp_write(sock->sd, data, (size_t)size);
284 DEBUG3("write(%d, %p, %ld);", sock->sd, data, size);
288 if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK)
290 if (errno == EINTR || errno == EAGAIN)
294 THROW4(system_error,0,"write(%d,%p,%ld) failed: %s",
295 sock->sd, data, size,
303 THROW1(system_error,0,"file descriptor closed (%s)",
309 gras_trp_tcp_recv_withbuffer(gras_socket_t sock,
311 unsigned long int size,
312 unsigned long int bufsize) {
319 DEBUG5("read(%d, %p, %ld) got %d so far (%s)",
320 sock->sd, data+got, bufsize, got,
321 hexa_str((unsigned char*)data,got));
322 status = tcp_read(sock->sd, data+got, (size_t)bufsize);
325 THROW7(system_error,0,"read(%d,%p,%d) from %s:%d failed: %s; got %d so far",
326 sock->sd, data+got, (int)size,
327 gras_socket_peer_name(sock),gras_socket_peer_port(sock),
331 DEBUG2("Got %d more bytes (%s)",status,hexa_str((unsigned char*)data+got,status));
337 THROW1(system_error,0,"Socket closed by remote side (got %d bytes before this)",
344 static int gras_trp_tcp_recv(gras_socket_t sock,
346 unsigned long int size) {
347 return gras_trp_tcp_recv_withbuffer(sock,data,size,size);
350 /*******************************************/
351 /****[ end of UNBUFFERED DATA EXCHANGE ]****/
352 /*******************************************/
354 /**********************************/
355 /****[ BUFFERED DATA EXCHANGE ]****/
356 /**********************************/
358 /* Make sure the data is sent */
360 gras_trp_bufiov_flush(gras_socket_t sock) {
365 gras_trp_bufdata_t *data=sock->bufdata;
369 if (data->out == buffering_buf) {
370 if (XBT_LOG_ISENABLED(gras_trp_tcp,xbt_log_priority_debug))
371 hexa_print("chunk to send ",
372 (unsigned char *) data->out_buf.data,data->out_buf.size);
373 if ((data->out_buf.size - data->out_buf.pos) != 0) {
374 DEBUG3("Send the chunk (size=%d) to %s:%d",data->out_buf.size,
375 gras_socket_peer_name(sock),gras_socket_peer_port(sock));
376 gras_trp_tcp_send(sock, data->out_buf.data, data->out_buf.size);
377 VERB1("Chunk sent (size=%d)",data->out_buf.size);
378 data->out_buf.size = 0;
383 if (data->out == buffering_iov) {
384 DEBUG0("Flush out iov");
385 vect = sock->bufdata->out_buf_v;
386 if ((size = xbt_dynar_length(vect))) {
387 DEBUG1("Flush %d chunks out of this socket",size);
388 writev(sock->sd,xbt_dynar_get_ptr(vect,0),size);
389 xbt_dynar_reset(vect);
391 data->out_buf.size = 0; /* reset the buffer containing non-stable data */
394 if (data->in == buffering_iov) {
395 DEBUG0("Flush in iov");
396 vect = sock->bufdata->in_buf_v;
397 if ((size = xbt_dynar_length(vect))) {
398 DEBUG1("Get %d chunks from of this socket",size);
399 readv(sock->sd,xbt_dynar_get_ptr(vect,0),size);
400 xbt_dynar_reset(vect);
406 gras_trp_buf_send(gras_socket_t sock,
408 unsigned long int size,
409 int stable_ignored) {
411 gras_trp_bufdata_t *data=(gras_trp_bufdata_t*)sock->bufdata;
416 while (chunk_pos < size) {
417 /* size of the chunk to receive in that shot */
418 long int thissize = min(size-chunk_pos,data->buffsize-data->out_buf.size);
419 DEBUG4("Set the chars %d..%ld into the buffer; size=%ld, ctn=(%s)",
420 (int)data->out_buf.size,
421 ((int)data->out_buf.size) + thissize -1,
423 hexa_str((unsigned char*)chunk,thissize));
425 memcpy(data->out_buf.data + data->out_buf.size, chunk + chunk_pos, thissize);
427 data->out_buf.size += thissize;
428 chunk_pos += thissize;
429 DEBUG4("New pos = %d; Still to send = %ld of %ld; ctn sofar=(%s)",
430 data->out_buf.size,size-chunk_pos,size,hexa_str((unsigned char*)chunk,chunk_pos));
432 if (data->out_buf.size == data->buffsize) /* out of space. Flush it */
433 gras_trp_bufiov_flush(sock);
440 gras_trp_buf_recv(gras_socket_t sock,
442 unsigned long int size) {
444 gras_trp_bufdata_t *data=sock->bufdata;
445 long int chunk_pos = 0;
449 while (chunk_pos < size) {
450 /* size of the chunk to receive in that shot */
453 if (data->in_buf.size == data->in_buf.pos) { /* out of data. Get more */
455 DEBUG2("Get more data (size=%d,bufsize=%d)",
456 (int)MIN(size-chunk_pos,data->buffsize),
457 (int)data->buffsize);
461 gras_trp_tcp_recv_withbuffer(sock, data->in_buf.data,
462 MIN(size-chunk_pos,data->buffsize),
468 thissize = min(size-chunk_pos , data->in_buf.size - data->in_buf.pos);
469 memcpy(chunk+chunk_pos, data->in_buf.data + data->in_buf.pos, thissize);
471 data->in_buf.pos += thissize;
472 chunk_pos += thissize;
473 DEBUG4("New pos = %d; Still to receive = %ld of %ld. Ctn so far=(%s)",
474 data->in_buf.pos,size - chunk_pos,size,hexa_str((unsigned char*)chunk,chunk_pos));
481 /*****************************************/
482 /****[ end of BUFFERED DATA EXCHANGE ]****/
483 /*****************************************/
485 /********************************/
486 /****[ VECTOR DATA EXCHANGE ]****/
487 /********************************/
490 gras_trp_iov_send(gras_socket_t sock,
492 unsigned long int size,
495 gras_trp_bufdata_t *data=(gras_trp_bufdata_t*)sock->bufdata;
498 DEBUG1("Buffer one chunk to be sent later (%s)",
499 hexa_str((char*)chunk,size));
501 elm.iov_len = (size_t)size;
504 /* data storage won't last until flush. Save it in a buffer if we can */
506 if (size > data->buffsize-data->out_buf.size) {
508 flush the socket, using data in its actual storage */
509 elm.iov_base = (void*)chunk;
510 xbt_dynar_push(data->out_buf_v,&elm);
512 gras_trp_bufiov_flush(sock);
515 /* buffer big enough:
516 copy data into it, and chain it for upcoming writev */
517 memcpy(data->out_buf.data + data->out_buf.size, chunk, size);
518 elm.iov_base = (void*)(data->out_buf.data + data->out_buf.size);
519 data->out_buf.size += size;
521 xbt_dynar_push(data->out_buf_v,&elm);
525 /* data storage stable. Chain it */
527 elm.iov_base = (void*)chunk;
528 xbt_dynar_push(data->out_buf_v,&elm);
532 gras_trp_iov_recv(gras_socket_t sock,
534 unsigned long int size) {
537 DEBUG0("Buffer one chunk to be received later");
538 elm.iov_base = (void*)chunk;
539 elm.iov_len = (size_t)size;
540 xbt_dynar_push(sock->bufdata->in_buf_v,&elm);
546 /***************************************/
547 /****[ end of VECTOR DATA EXCHANGE ]****/
548 /***************************************/
552 *** Prototypes of BUFFERED
555 void gras_trp_buf_socket_client(gras_trp_plugin_t self,
557 void gras_trp_buf_socket_server(gras_trp_plugin_t self,
559 gras_socket_t gras_trp_buf_socket_accept(gras_socket_t sock);
561 void gras_trp_buf_socket_close(gras_socket_t sd);
564 gras_socket_t gras_trp_buf_init_sock(gras_socket_t sock) {
565 gras_trp_bufdata_t *data=xbt_new(gras_trp_bufdata_t,1);
567 data->buffsize = 100 * 1024 ; /* 100k */
569 data->in_buf.size = 0;
570 data->in_buf.data = xbt_malloc(data->buffsize);
571 data->in_buf.pos = 0; /* useless, indeed, since size==pos */
573 data->out_buf.size = 0;
574 data->out_buf.data = xbt_malloc(data->buffsize);
575 data->out_buf.pos = data->out_buf.size;
578 data->in_buf_v = data->out_buf_v = NULL;
579 data->in_buf_v=xbt_dynar_new(sizeof(struct iovec),NULL);
580 data->out_buf_v=xbt_dynar_new(sizeof(struct iovec),NULL);
581 data->out = buffering_iov;
583 data->out = buffering_buf;
586 data->in = buffering_buf;
588 sock->bufdata = data;
596 gras_trp_tcp_setup(gras_trp_plugin_t plug) {
598 plug->socket_client = gras_trp_buf_socket_client;
599 plug->socket_server = gras_trp_buf_socket_server;
600 plug->socket_accept = gras_trp_buf_socket_accept;
601 plug->socket_close = gras_trp_buf_socket_close;
604 plug->send = gras_trp_iov_send;
606 plug->send = gras_trp_buf_send;
608 plug->recv = gras_trp_buf_recv;
610 plug->raw_send = gras_trp_tcp_send;
611 plug->raw_recv = gras_trp_tcp_recv;
613 plug->flush = gras_trp_bufiov_flush;
619 void gras_trp_buf_socket_client(gras_trp_plugin_t self,
620 /* OUT */ gras_socket_t sock){
622 gras_trp_sock_socket_client(NULL,sock);
623 gras_trp_buf_init_sock(sock);
627 * gras_trp_buf_socket_server:
629 * Open a socket used to receive messages.
631 void gras_trp_buf_socket_server(gras_trp_plugin_t self,
632 /* OUT */ gras_socket_t sock){
634 gras_trp_sock_socket_server(NULL,sock);
635 gras_trp_buf_init_sock(sock);
638 gras_socket_t gras_trp_buf_socket_accept(gras_socket_t sock) {
639 return gras_trp_buf_init_sock(gras_trp_sock_socket_accept(sock));
642 void gras_trp_buf_socket_close(gras_socket_t sock){
643 gras_trp_bufdata_t *data=sock->bufdata;
645 if (data->in_buf.size!=data->in_buf.pos) {
646 WARN3("Socket closed, but %d bytes were unread (size=%d,pos=%d)",
647 data->in_buf.size - data->in_buf.pos,
648 data->in_buf.size, data->in_buf.pos);
650 if (data->in_buf.data)
651 free(data->in_buf.data);
653 if (data->out_buf.size!=data->out_buf.pos) {
654 DEBUG2("Flush the socket before closing (in=%d,out=%d)",
655 data->in_buf.size, data->out_buf.size);
656 gras_trp_bufiov_flush(sock);
658 if (data->out_buf.data)
659 free(data->out_buf.data);
662 if (data->in_buf_v) {
663 if (xbt_dynar_length(data->in_buf_v))
664 WARN0("Socket closed, but some bytes were unread");
665 xbt_dynar_free(&data->in_buf_v);
667 if (data->out_buf_v) {
668 if (xbt_dynar_length(data->out_buf_v)) {
669 DEBUG0("Flush the socket before closing");
670 gras_trp_bufiov_flush(sock);
672 xbt_dynar_free(&data->out_buf_v);
677 gras_trp_sock_socket_close(sock);
680 /****************************/
681 /****[ HELPER FUNCTIONS ]****/
682 /****************************/
685 * Returns the tcp protocol number from the network protocol data base.
687 * getprotobyname() is not thread safe. We need to lock it.
689 static int _gras_tcp_proto_number(void) {
690 struct protoent *fetchedEntry;
691 static int returnValue = 0;
693 if(returnValue == 0) {
694 fetchedEntry = getprotobyname("tcp");
695 xbt_assert0(fetchedEntry, "getprotobyname(tcp) gave NULL");
696 returnValue = fetchedEntry->p_proto;
702 #ifdef HAVE_WINSOCK_H
703 #define RETSTR( x ) case x: return #x
705 const char *gras_wsa_err2string( int err ) {
713 RETSTR( WSAEWOULDBLOCK );
714 RETSTR( WSAEINPROGRESS );
715 RETSTR( WSAEALREADY );
716 RETSTR( WSAENOTSOCK );
717 RETSTR( WSAEDESTADDRREQ );
718 RETSTR( WSAEMSGSIZE );
719 RETSTR( WSAEPROTOTYPE );
720 RETSTR( WSAENOPROTOOPT );
721 RETSTR( WSAEPROTONOSUPPORT );
722 RETSTR( WSAESOCKTNOSUPPORT );
723 RETSTR( WSAEOPNOTSUPP );
724 RETSTR( WSAEPFNOSUPPORT );
725 RETSTR( WSAEAFNOSUPPORT );
726 RETSTR( WSAEADDRINUSE );
727 RETSTR( WSAEADDRNOTAVAIL );
728 RETSTR( WSAENETDOWN );
729 RETSTR( WSAENETUNREACH );
730 RETSTR( WSAENETRESET );
731 RETSTR( WSAECONNABORTED );
732 RETSTR( WSAECONNRESET );
733 RETSTR( WSAENOBUFS );
734 RETSTR( WSAEISCONN );
735 RETSTR( WSAENOTCONN );
736 RETSTR( WSAESHUTDOWN );
737 RETSTR( WSAETOOMANYREFS );
738 RETSTR( WSAETIMEDOUT );
739 RETSTR( WSAECONNREFUSED );
741 RETSTR( WSAENAMETOOLONG );
742 RETSTR( WSAEHOSTDOWN );
743 RETSTR( WSAEHOSTUNREACH );
744 RETSTR( WSAENOTEMPTY );
745 RETSTR( WSAEPROCLIM );
749 RETSTR( WSAEREMOTE );
750 RETSTR( WSASYSNOTREADY );
751 RETSTR( WSAVERNOTSUPPORTED );
752 RETSTR( WSANOTINITIALISED );
753 RETSTR( WSAEDISCON );
756 RETSTR( WSAENOMORE );
757 RETSTR( WSAECANCELLED );
758 RETSTR( WSAEINVALIDPROCTABLE );
759 RETSTR( WSAEINVALIDPROVIDER );
760 RETSTR( WSASYSCALLFAILURE );
761 RETSTR( WSASERVICE_NOT_FOUND );
762 RETSTR( WSATYPE_NOT_FOUND );
763 RETSTR( WSA_E_NO_MORE );
764 RETSTR( WSA_E_CANCELLED );
765 RETSTR( WSAEREFUSED );
766 #endif /* HAVE_WINSOCK2 */
768 RETSTR( WSAHOST_NOT_FOUND );
769 RETSTR( WSATRY_AGAIN );
770 RETSTR( WSANO_RECOVERY );
771 RETSTR( WSANO_DATA );
773 return "unknown WSA error";
775 #endif /* HAVE_WINSOCK_H */
777 /***********************************/
778 /****[ end of HELPER FUNCTIONS ]****/
779 /***********************************/