1 /* buf trp (transport) - buffered transport using the TCP one */
3 /* Copyright (c) 2004, 2005, 2006, 2007, 2009, 2010. The SimGrid Team.
4 * All rights reserved. */
6 /* This program is free software; you can redistribute it and/or modify it
7 * under the terms of the license (GNU LGPL) which comes with this package. */
10 #include <string.h> /* memset */
14 #include "xbt/sysdep.h"
16 #include "gras/Transport/transport_private.h"
17 #include "gras/Msg/msg_interface.h" /* listener_close_socket */
19 /* FIXME maybe READV is sometime a good thing? */
27 #define MIN(a,b) ((a)<(b)?(a):(b))
30 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(gras_trp_tcp, gras_trp,
31 "TCP buffered transport");
34 *** Specific socket part
37 typedef enum { buffering_buf, buffering_iov } buffering_kind;
42 int pos; /* for receive; not exchanged over the net */
46 struct gras_trp_bufdata_ {
48 gras_trp_buf_t in_buf;
49 gras_trp_buf_t out_buf;
53 xbt_dynar_t out_buf_v;
61 /*****************************/
62 /****[ SOCKET MANAGEMENT ]****/
63 /*****************************/
64 /* we exchange port number on client side on socket creation,
65 so we need to be able to talk right now. */
66 static XBT_INLINE void gras_trp_tcp_send(gras_socket_t sock,
68 unsigned long int size);
69 static int gras_trp_tcp_recv(gras_socket_t sock, char *data,
70 unsigned long int size);
73 static int _gras_tcp_proto_number(void);
75 static XBT_INLINE void gras_trp_sock_socket_client(gras_trp_plugin_t
80 struct sockaddr_in addr;
82 struct in_addr *haddr;
83 int size = sock->buf_size;
84 uint32_t myport = htonl(((gras_trp_procdata_t)
86 (gras_trp_libdata_id))->myport);
88 sock->incoming = 1; /* TCP sockets are duplex'ed */
90 sock->sd = socket(AF_INET, SOCK_STREAM, 0);
93 THROW1(system_error, 0, "Failed to create socket: %s",
94 sock_errstr(sock_errno));
98 (sock->sd, SOL_SOCKET, SO_RCVBUF, (char *) &size, sizeof(size))
99 || setsockopt(sock->sd, SOL_SOCKET, SO_SNDBUF, (char *) &size,
101 VERB1("setsockopt failed, cannot set buffer size: %s",
102 sock_errstr(sock_errno));
105 he = gethostbyname(sock->peer_name);
107 THROW2(system_error, 0, "Failed to lookup hostname %s: %s",
108 sock->peer_name, sock_errstr(sock_errno));
111 haddr = ((struct in_addr *) (he->h_addr_list)[0]);
113 memset(&addr, 0, sizeof(struct sockaddr_in));
114 memcpy(&addr.sin_addr, haddr, sizeof(struct in_addr));
115 addr.sin_family = AF_INET;
116 addr.sin_port = htons(sock->peer_port);
118 if (connect(sock->sd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
120 THROW3(system_error, 0,
121 "Failed to connect socket to %s:%d (%s)",
122 sock->peer_name, sock->peer_port, sock_errstr(sock_errno));
125 gras_trp_tcp_send(sock, (char *) &myport, sizeof(uint32_t));
126 DEBUG1("peerport sent to %d", sock->peer_port);
128 VERB4("Connect to %s:%d (sd=%d, port %d here)",
129 sock->peer_name, sock->peer_port, sock->sd, sock->port);
133 * gras_trp_sock_socket_server:
135 * Open a socket used to receive messages.
137 static XBT_INLINE void gras_trp_sock_socket_server(gras_trp_plugin_t
141 int size = sock->buf_size;
143 struct sockaddr_in server;
145 sock->outgoing = 1; /* TCP => duplex mode */
147 server.sin_port = htons((u_short) sock->port);
148 server.sin_addr.s_addr = INADDR_ANY;
149 server.sin_family = AF_INET;
150 if ((sock->sd = socket(AF_INET, SOCK_STREAM, 0)) < 0)
151 THROW1(system_error, 0, "Socket allocation failed: %s",
152 sock_errstr(sock_errno));
155 (sock->sd, SOL_SOCKET, SO_REUSEADDR, (char *) &on, sizeof(on)))
156 THROW1(system_error, 0,
157 "setsockopt failed, cannot condition the socket: %s",
158 sock_errstr(sock_errno));
160 if (setsockopt(sock->sd, SOL_SOCKET, SO_RCVBUF,
161 (char *) &size, sizeof(size))
162 || setsockopt(sock->sd, SOL_SOCKET, SO_SNDBUF,
163 (char *) &size, sizeof(size))) {
164 VERB1("setsockopt failed, cannot set buffer size: %s",
165 sock_errstr(sock_errno));
168 if (bind(sock->sd, (struct sockaddr *) &server, sizeof(server)) == -1) {
170 THROW2(system_error, 0,
171 "Cannot bind to port %d: %s", sock->port,
172 sock_errstr(sock_errno));
175 DEBUG2("Listen on port %d (sd=%d)", sock->port, sock->sd);
176 if (listen(sock->sd, 5) < 0) {
178 THROW2(system_error, 0,
179 "Cannot listen on port %d: %s",
180 sock->port, sock_errstr(sock_errno));
183 VERB2("Openned a server socket on port %d (sd=%d)", sock->port,
187 static gras_socket_t gras_trp_sock_socket_accept(gras_socket_t sock)
191 struct sockaddr_in peer_in;
192 socklen_t peer_in_len = sizeof(peer_in);
199 socklen_t s = sizeof(int);
204 gras_trp_socket_new(1, &res);
206 sd = accept(sock->sd, (struct sockaddr *) &peer_in, &peer_in_len);
207 tmp_errno = sock_errno;
210 gras_socket_close(sock);
211 THROW1(system_error, 0,
212 "Accept failed (%s). Droping server socket.",
213 sock_errstr(tmp_errno));
216 if (setsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, (char *) &i, s)
217 || setsockopt(sd, _gras_tcp_proto_number(), TCP_NODELAY, (char *) &i,
219 THROW1(system_error, 0,
220 "setsockopt failed, cannot condition the socket: %s",
221 sock_errstr(tmp_errno));
223 res->buf_size = sock->buf_size;
224 size = sock->buf_size;
225 if (setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *) &size, sizeof(size))
226 || setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *) &size,
228 VERB1("setsockopt failed, cannot set buffer size: %s",
229 sock_errstr(tmp_errno));
231 res->plugin = sock->plugin;
232 res->incoming = sock->incoming;
233 res->outgoing = sock->outgoing;
238 gras_trp_tcp_recv(res, (char *) &hisport, sizeof(hisport));
239 res->peer_port = ntohl(hisport);
240 DEBUG1("peerport %d received", res->peer_port);
242 /* FIXME: Lock to protect inet_ntoa */
243 if (((struct sockaddr *) &peer_in)->sa_family != AF_INET) {
244 res->peer_name = (char *) strdup("unknown");
246 struct in_addr addrAsInAddr;
249 addrAsInAddr.s_addr = peer_in.sin_addr.s_addr;
251 tmp = inet_ntoa(addrAsInAddr);
253 res->peer_name = (char *) strdup(tmp);
255 res->peer_name = (char *) strdup("unknown");
259 VERB3("Accepted from %s:%d (sd=%d)", res->peer_name, res->peer_port, sd);
260 xbt_dynar_push(((gras_trp_procdata_t)
261 gras_libdata_by_id(gras_trp_libdata_id))->sockets, &res);
267 static void gras_trp_sock_socket_close(gras_socket_t sock)
271 return; /* close only once */
273 VERB1("close tcp connection %d", sock->sd);
275 /* ask the listener to close the socket */
276 gras_msg_listener_close_socket(sock->sd);
279 /************************************/
280 /****[ end of SOCKET MANAGEMENT ]****/
281 /************************************/
284 /************************************/
285 /****[ UNBUFFERED DATA EXCHANGE ]****/
286 /************************************/
287 /* Temptation to merge this with file data exchange is great,
288 but doesn't work on BillWare (see tcp_write() in portable.h) */
289 static XBT_INLINE void gras_trp_tcp_send(gras_socket_t sock,
291 unsigned long int size)
297 status = tcp_write(sock->sd, data, (size_t) size);
298 DEBUG3("write(%d, %p, %ld);", sock->sd, data, size);
302 if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK)
304 if (errno == EINTR || errno == EAGAIN)
308 THROW4(system_error, 0, "write(%d,%p,%ld) failed: %s",
309 sock->sd, data, size, sock_errstr(sock_errno));
316 THROW1(system_error, 0, "file descriptor closed (%s)",
317 sock_errstr(sock_errno));
322 static XBT_INLINE int
323 gras_trp_tcp_recv_withbuffer(gras_socket_t sock,
325 unsigned long int size,
326 unsigned long int bufsize)
332 data[0] = sock->recvd_val;
341 DEBUG5("read(%d, %p, %ld) got %d so far (%s)",
342 sock->sd, data + got, bufsize, got,
343 hexa_str((unsigned char *) data, got, 0));
344 status = tcp_read(sock->sd, data + got, (size_t) bufsize);
347 THROW7(system_error, 0,
348 "read(%d,%p,%d) from %s:%d failed: %s; got %d so far",
349 sock->sd, data + got, (int) size, gras_socket_peer_name(sock),
350 gras_socket_peer_port(sock), sock_errstr(sock_errno), got);
352 DEBUG2("Got %d more bytes (%s)", status,
353 hexa_str((unsigned char *) data + got, status, 0));
359 THROW1(system_error, errno,
360 "Socket closed by remote side (got %d bytes before this)",
368 static int gras_trp_tcp_recv(gras_socket_t sock,
369 char *data, unsigned long int size)
371 return gras_trp_tcp_recv_withbuffer(sock, data, size, size);
375 /*******************************************/
376 /****[ end of UNBUFFERED DATA EXCHANGE ]****/
377 /*******************************************/
379 /**********************************/
380 /****[ BUFFERED DATA EXCHANGE ]****/
381 /**********************************/
383 /* Make sure the data is sent */
384 static void gras_trp_bufiov_flush(gras_socket_t sock)
390 gras_trp_bufdata_t *data = sock->bufdata;
394 if (data->out == buffering_buf) {
395 if (XBT_LOG_ISENABLED(gras_trp_tcp, xbt_log_priority_debug))
396 hexa_print("chunk to send ",
397 (unsigned char *) data->out_buf.data, data->out_buf.size);
398 if ((data->out_buf.size - data->out_buf.pos) != 0) {
399 DEBUG3("Send the chunk (size=%d) to %s:%d", data->out_buf.size,
400 gras_socket_peer_name(sock), gras_socket_peer_port(sock));
401 gras_trp_tcp_send(sock, data->out_buf.data, data->out_buf.size);
402 VERB1("Chunk sent (size=%d)", data->out_buf.size);
403 data->out_buf.size = 0;
407 if (data->out == buffering_iov) {
408 DEBUG0("Flush out iov");
409 vect = sock->bufdata->out_buf_v;
410 if ((size = xbt_dynar_length(vect))) {
411 DEBUG1("Flush %d chunks out of this socket", size);
412 writev(sock->sd, xbt_dynar_get_ptr(vect, 0), size);
413 xbt_dynar_reset(vect);
415 data->out_buf.size = 0; /* reset the buffer containing non-stable data */
418 if (data->in == buffering_iov) {
419 DEBUG0("Flush in iov");
420 vect = sock->bufdata->in_buf_v;
421 if ((size = xbt_dynar_length(vect))) {
422 DEBUG1("Get %d chunks from of this socket", size);
423 readv(sock->sd, xbt_dynar_get_ptr(vect, 0), size);
424 xbt_dynar_reset(vect);
431 gras_trp_buf_send(gras_socket_t sock,
433 unsigned long int size, int stable_ignored)
436 gras_trp_bufdata_t *data = (gras_trp_bufdata_t *) sock->bufdata;
441 while (chunk_pos < size) {
442 /* size of the chunk to receive in that shot */
444 min(size - chunk_pos, data->buffsize - data->out_buf.size);
445 DEBUG4("Set the chars %d..%ld into the buffer; size=%ld, ctn=(%s)",
446 (int) data->out_buf.size,
447 ((int) data->out_buf.size) + thissize - 1, size,
448 hexa_str((unsigned char *) chunk, thissize, 0));
450 memcpy(data->out_buf.data + data->out_buf.size, chunk + chunk_pos,
453 data->out_buf.size += thissize;
454 chunk_pos += thissize;
455 DEBUG4("New pos = %d; Still to send = %ld of %ld; ctn sofar=(%s)",
456 data->out_buf.size, size - chunk_pos, size,
457 hexa_str((unsigned char *) chunk, chunk_pos, 0));
459 if (data->out_buf.size == data->buffsize) /* out of space. Flush it */
460 gras_trp_bufiov_flush(sock);
467 gras_trp_buf_recv(gras_socket_t sock, char *chunk, unsigned long int size)
470 gras_trp_bufdata_t *data = sock->bufdata;
471 long int chunk_pos = 0;
475 while (chunk_pos < size) {
476 /* size of the chunk to receive in that shot */
479 if (data->in_buf.size == data->in_buf.pos) { /* out of data. Get more */
481 DEBUG2("Get more data (size=%d,bufsize=%d)",
482 (int) MIN(size - chunk_pos, data->buffsize),
483 (int) data->buffsize);
487 gras_trp_tcp_recv_withbuffer(sock, data->in_buf.data,
488 MIN(size - chunk_pos,
492 data->in_buf.pos = 0;
495 thissize = min(size - chunk_pos, data->in_buf.size - data->in_buf.pos);
496 memcpy(chunk + chunk_pos, data->in_buf.data + data->in_buf.pos,
499 data->in_buf.pos += thissize;
500 chunk_pos += thissize;
501 DEBUG4("New pos = %d; Still to receive = %ld of %ld. Ctn so far=(%s)",
502 data->in_buf.pos, size - chunk_pos, size,
503 hexa_str((unsigned char *) chunk, chunk_pos, 0));
505 /* indicate on need to the gras_select function that there is more to read on this socket so that it does not actually select */
506 sock->moredata = (data->in_buf.size > data->in_buf.pos);
507 DEBUG1("There is %smore data", (sock->moredata ? "" : "no "));
513 /*****************************************/
514 /****[ end of BUFFERED DATA EXCHANGE ]****/
515 /*****************************************/
517 /********************************/
518 /****[ VECTOR DATA EXCHANGE ]****/
519 /********************************/
522 gras_trp_iov_send(gras_socket_t sock,
523 const char *chunk, unsigned long int size, int stable)
526 gras_trp_bufdata_t *data = (gras_trp_bufdata_t *) sock->bufdata;
529 DEBUG1("Buffer one chunk to be sent later (%s)",
530 hexa_str((char *) chunk, size, 0));
532 elm.iov_len = (size_t) size;
535 /* data storage won't last until flush. Save it in a buffer if we can */
537 if (size > data->buffsize - data->out_buf.size) {
539 flush the socket, using data in its actual storage */
540 elm.iov_base = (void *) chunk;
541 xbt_dynar_push(data->out_buf_v, &elm);
543 gras_trp_bufiov_flush(sock);
546 /* buffer big enough:
547 copy data into it, and chain it for upcoming writev */
548 memcpy(data->out_buf.data + data->out_buf.size, chunk, size);
549 elm.iov_base = (void *) (data->out_buf.data + data->out_buf.size);
550 data->out_buf.size += size;
552 xbt_dynar_push(data->out_buf_v, &elm);
556 /* data storage stable. Chain it */
558 elm.iov_base = (void *) chunk;
559 xbt_dynar_push(data->out_buf_v, &elm);
564 gras_trp_iov_recv(gras_socket_t sock, char *chunk, unsigned long int size)
568 DEBUG0("Buffer one chunk to be received later");
569 elm.iov_base = (void *) chunk;
570 elm.iov_len = (size_t) size;
571 xbt_dynar_push(sock->bufdata->in_buf_v, &elm);
577 /***************************************/
578 /****[ end of VECTOR DATA EXCHANGE ]****/
579 /***************************************/
583 *** Prototypes of BUFFERED
586 void gras_trp_buf_socket_client(gras_trp_plugin_t self,
588 void gras_trp_buf_socket_server(gras_trp_plugin_t self,
590 gras_socket_t gras_trp_buf_socket_accept(gras_socket_t sock);
592 void gras_trp_buf_socket_close(gras_socket_t sd);
595 gras_socket_t gras_trp_buf_init_sock(gras_socket_t sock)
597 gras_trp_bufdata_t *data = xbt_new(gras_trp_bufdata_t, 1);
599 data->buffsize = 100 * 1024; /* 100k */
601 data->in_buf.size = 0;
602 data->in_buf.data = xbt_malloc(data->buffsize);
603 data->in_buf.pos = 0; /* useless, indeed, since size==pos */
605 data->out_buf.size = 0;
606 data->out_buf.data = xbt_malloc(data->buffsize);
607 data->out_buf.pos = data->out_buf.size;
610 data->in_buf_v = data->out_buf_v = NULL;
611 data->in_buf_v = xbt_dynar_new(sizeof(struct iovec), NULL);
612 data->out_buf_v = xbt_dynar_new(sizeof(struct iovec), NULL);
613 data->out = buffering_iov;
615 data->out = buffering_buf;
618 data->in = buffering_buf;
620 sock->bufdata = data;
627 void gras_trp_tcp_setup(gras_trp_plugin_t plug)
630 plug->socket_client = gras_trp_buf_socket_client;
631 plug->socket_server = gras_trp_buf_socket_server;
632 plug->socket_accept = gras_trp_buf_socket_accept;
633 plug->socket_close = gras_trp_buf_socket_close;
636 plug->send = gras_trp_iov_send;
638 plug->send = gras_trp_buf_send;
640 plug->recv = gras_trp_buf_recv;
642 plug->raw_send = gras_trp_tcp_send;
643 plug->raw_recv = gras_trp_tcp_recv;
645 plug->flush = gras_trp_bufiov_flush;
651 void gras_trp_buf_socket_client(gras_trp_plugin_t self,
652 /* OUT */ gras_socket_t sock)
655 gras_trp_sock_socket_client(NULL, sock);
656 gras_trp_buf_init_sock(sock);
660 * gras_trp_buf_socket_server:
662 * Open a socket used to receive messages.
664 void gras_trp_buf_socket_server(gras_trp_plugin_t self,
665 /* OUT */ gras_socket_t sock)
668 gras_trp_sock_socket_server(NULL, sock);
669 gras_trp_buf_init_sock(sock);
672 gras_socket_t gras_trp_buf_socket_accept(gras_socket_t sock)
674 return gras_trp_buf_init_sock(gras_trp_sock_socket_accept(sock));
677 void gras_trp_buf_socket_close(gras_socket_t sock)
679 gras_trp_bufdata_t *data = sock->bufdata;
681 if (data->in_buf.size != data->in_buf.pos) {
682 WARN3("Socket closed, but %d bytes were unread (size=%d,pos=%d)",
683 data->in_buf.size - data->in_buf.pos,
684 data->in_buf.size, data->in_buf.pos);
686 if (data->in_buf.data)
687 free(data->in_buf.data);
689 if (data->out_buf.size != data->out_buf.pos) {
690 DEBUG2("Flush the socket before closing (in=%d,out=%d)",
691 data->in_buf.size, data->out_buf.size);
692 gras_trp_bufiov_flush(sock);
694 if (data->out_buf.data)
695 free(data->out_buf.data);
698 if (data->in_buf_v) {
699 if (xbt_dynar_length(data->in_buf_v))
700 WARN0("Socket closed, but some bytes were unread");
701 xbt_dynar_free(&data->in_buf_v);
703 if (data->out_buf_v) {
704 if (xbt_dynar_length(data->out_buf_v)) {
705 DEBUG0("Flush the socket before closing");
706 gras_trp_bufiov_flush(sock);
708 xbt_dynar_free(&data->out_buf_v);
713 gras_trp_sock_socket_close(sock);
716 /****************************/
717 /****[ HELPER FUNCTIONS ]****/
718 /****************************/
721 * Returns the tcp protocol number from the network protocol data base.
723 * getprotobyname() is not thread safe. We need to lock it.
725 static int _gras_tcp_proto_number(void)
727 struct protoent *fetchedEntry;
728 static int returnValue = 0;
730 if (returnValue == 0) {
731 fetchedEntry = getprotobyname("tcp");
732 xbt_assert0(fetchedEntry, "getprotobyname(tcp) gave NULL");
733 returnValue = fetchedEntry->p_proto;
739 #ifdef HAVE_WINSOCK_H
740 #define RETSTR( x ) case x: return #x
742 const char *gras_wsa_err2string(int err)
751 RETSTR(WSAEWOULDBLOCK);
752 RETSTR(WSAEINPROGRESS);
755 RETSTR(WSAEDESTADDRREQ);
757 RETSTR(WSAEPROTOTYPE);
758 RETSTR(WSAENOPROTOOPT);
759 RETSTR(WSAEPROTONOSUPPORT);
760 RETSTR(WSAESOCKTNOSUPPORT);
761 RETSTR(WSAEOPNOTSUPP);
762 RETSTR(WSAEPFNOSUPPORT);
763 RETSTR(WSAEAFNOSUPPORT);
764 RETSTR(WSAEADDRINUSE);
765 RETSTR(WSAEADDRNOTAVAIL);
767 RETSTR(WSAENETUNREACH);
768 RETSTR(WSAENETRESET);
769 RETSTR(WSAECONNABORTED);
770 RETSTR(WSAECONNRESET);
774 RETSTR(WSAESHUTDOWN);
775 RETSTR(WSAETOOMANYREFS);
776 RETSTR(WSAETIMEDOUT);
777 RETSTR(WSAECONNREFUSED);
779 RETSTR(WSAENAMETOOLONG);
780 RETSTR(WSAEHOSTDOWN);
781 RETSTR(WSAEHOSTUNREACH);
782 RETSTR(WSAENOTEMPTY);
788 RETSTR(WSASYSNOTREADY);
789 RETSTR(WSAVERNOTSUPPORTED);
790 RETSTR(WSANOTINITIALISED);
795 RETSTR(WSAECANCELLED);
796 RETSTR(WSAEINVALIDPROCTABLE);
797 RETSTR(WSAEINVALIDPROVIDER);
798 RETSTR(WSASYSCALLFAILURE);
799 RETSTR(WSASERVICE_NOT_FOUND);
800 RETSTR(WSATYPE_NOT_FOUND);
801 RETSTR(WSA_E_NO_MORE);
802 RETSTR(WSA_E_CANCELLED);
804 #endif /* HAVE_WINSOCK2 */
806 RETSTR(WSAHOST_NOT_FOUND);
807 RETSTR(WSATRY_AGAIN);
808 RETSTR(WSANO_RECOVERY);
811 return "unknown WSA error";
813 #endif /* HAVE_WINSOCK_H */
815 /***********************************/
816 /****[ end of HELPER FUNCTIONS ]****/
817 /***********************************/