3 /* buf trp (transport) - buffered transport using the TCP one */
5 /* Copyright (c) 2004 Martin Quinson. All rights reserved. */
7 /* This program is free software; you can redistribute it and/or modify it
8 * under the terms of the license (GNU LGPL) which comes with this package. */
11 #include <string.h> /* memset */
15 #include "xbt/sysdep.h"
17 #include "gras/Transport/transport_private.h"
18 #include "gras/Msg/msg_interface.h" /* listener_close_socket */
20 /* FIXME maybe READV is sometime a good thing? */
28 #define MIN(a,b) ((a)<(b)?(a):(b))
31 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(gras_trp_tcp, gras_trp,
32 "TCP buffered transport");
35 *** Specific socket part
38 typedef enum { buffering_buf, buffering_iov } buffering_kind;
43 int pos; /* for receive; not exchanged over the net */
47 struct gras_trp_bufdata_ {
49 gras_trp_buf_t in_buf;
50 gras_trp_buf_t out_buf;
54 xbt_dynar_t out_buf_v;
62 /*****************************/
63 /****[ SOCKET MANAGEMENT ]****/
64 /*****************************/
65 /* we exchange port number on client side on socket creation,
66 so we need to be able to talk right now. */
67 static XBT_INLINE void gras_trp_tcp_send(gras_socket_t sock, const char *data,
68 unsigned long int size);
69 static int gras_trp_tcp_recv(gras_socket_t sock, char *data,
70 unsigned long int size);
73 static int _gras_tcp_proto_number(void);
75 static XBT_INLINE void gras_trp_sock_socket_client(gras_trp_plugin_t ignored,
79 struct sockaddr_in addr;
81 struct in_addr *haddr;
82 int size = sock->buf_size;
83 uint32_t myport = htonl(((gras_trp_procdata_t)
84 gras_libdata_by_id(gras_trp_libdata_id))->myport);
86 sock->incoming = 1; /* TCP sockets are duplex'ed */
88 sock->sd = socket(AF_INET, SOCK_STREAM, 0);
91 THROW1(system_error, 0, "Failed to create socket: %s",
92 sock_errstr(sock_errno));
96 (sock->sd, SOL_SOCKET, SO_RCVBUF, (char *) &size, sizeof(size))
97 || setsockopt(sock->sd, SOL_SOCKET, SO_SNDBUF, (char *) &size,
99 VERB1("setsockopt failed, cannot set buffer size: %s",
100 sock_errstr(sock_errno));
103 he = gethostbyname(sock->peer_name);
105 THROW2(system_error, 0, "Failed to lookup hostname %s: %s",
106 sock->peer_name, sock_errstr(sock_errno));
109 haddr = ((struct in_addr *) (he->h_addr_list)[0]);
111 memset(&addr, 0, sizeof(struct sockaddr_in));
112 memcpy(&addr.sin_addr, haddr, sizeof(struct in_addr));
113 addr.sin_family = AF_INET;
114 addr.sin_port = htons(sock->peer_port);
116 if (connect(sock->sd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
118 THROW3(system_error, 0,
119 "Failed to connect socket to %s:%d (%s)",
120 sock->peer_name, sock->peer_port, sock_errstr(sock_errno));
123 gras_trp_tcp_send(sock, (char *) &myport, sizeof(uint32_t));
124 DEBUG1("peerport sent to %d", sock->peer_port);
126 VERB4("Connect to %s:%d (sd=%d, port %d here)",
127 sock->peer_name, sock->peer_port, sock->sd, sock->port);
131 * gras_trp_sock_socket_server:
133 * Open a socket used to receive messages.
135 static XBT_INLINE void gras_trp_sock_socket_server(gras_trp_plugin_t ignored,
138 int size = sock->buf_size;
140 struct sockaddr_in server;
142 sock->outgoing = 1; /* TCP => duplex mode */
144 server.sin_port = htons((u_short) sock->port);
145 server.sin_addr.s_addr = INADDR_ANY;
146 server.sin_family = AF_INET;
147 if ((sock->sd = socket(AF_INET, SOCK_STREAM, 0)) < 0)
148 THROW1(system_error, 0, "Socket allocation failed: %s",
149 sock_errstr(sock_errno));
152 (sock->sd, SOL_SOCKET, SO_REUSEADDR, (char *) &on, sizeof(on)))
153 THROW1(system_error, 0,
154 "setsockopt failed, cannot condition the socket: %s",
155 sock_errstr(sock_errno));
157 if (setsockopt(sock->sd, SOL_SOCKET, SO_RCVBUF,
158 (char *) &size, sizeof(size))
159 || setsockopt(sock->sd, SOL_SOCKET, SO_SNDBUF,
160 (char *) &size, sizeof(size))) {
161 VERB1("setsockopt failed, cannot set buffer size: %s",
162 sock_errstr(sock_errno));
165 if (bind(sock->sd, (struct sockaddr *) &server, sizeof(server)) == -1) {
167 THROW2(system_error, 0,
168 "Cannot bind to port %d: %s", sock->port, sock_errstr(sock_errno));
171 DEBUG2("Listen on port %d (sd=%d)", sock->port, sock->sd);
172 if (listen(sock->sd, 5) < 0) {
174 THROW2(system_error, 0,
175 "Cannot listen on port %d: %s",
176 sock->port, sock_errstr(sock_errno));
179 VERB2("Openned a server socket on port %d (sd=%d)", sock->port, sock->sd);
182 static gras_socket_t gras_trp_sock_socket_accept(gras_socket_t sock)
186 struct sockaddr_in peer_in;
187 socklen_t peer_in_len = sizeof(peer_in);
194 socklen_t s = sizeof(int);
199 gras_trp_socket_new(1, &res);
201 sd = accept(sock->sd, (struct sockaddr *) &peer_in, &peer_in_len);
202 tmp_errno = sock_errno;
205 gras_socket_close(sock);
206 THROW1(system_error, 0,
207 "Accept failed (%s). Droping server socket.",
208 sock_errstr(tmp_errno));
211 if (setsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, (char *) &i, s)
212 || setsockopt(sd, _gras_tcp_proto_number(), TCP_NODELAY, (char *) &i,
214 THROW1(system_error, 0,
215 "setsockopt failed, cannot condition the socket: %s",
216 sock_errstr(tmp_errno));
218 res->buf_size = sock->buf_size;
219 size = sock->buf_size;
220 if (setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *) &size, sizeof(size))
221 || setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *) &size, sizeof(size)))
222 VERB1("setsockopt failed, cannot set buffer size: %s",
223 sock_errstr(tmp_errno));
225 res->plugin = sock->plugin;
226 res->incoming = sock->incoming;
227 res->outgoing = sock->outgoing;
232 gras_trp_tcp_recv(res, (char *) &hisport, sizeof(hisport));
233 res->peer_port = ntohl(hisport);
234 DEBUG1("peerport %d received", res->peer_port);
236 /* FIXME: Lock to protect inet_ntoa */
237 if (((struct sockaddr *) &peer_in)->sa_family != AF_INET) {
238 res->peer_name = (char *) strdup("unknown");
240 struct in_addr addrAsInAddr;
243 addrAsInAddr.s_addr = peer_in.sin_addr.s_addr;
245 tmp = inet_ntoa(addrAsInAddr);
247 res->peer_name = (char *) strdup(tmp);
249 res->peer_name = (char *) strdup("unknown");
253 VERB3("Accepted from %s:%d (sd=%d)", res->peer_name, res->peer_port, sd);
254 xbt_dynar_push(((gras_trp_procdata_t)
255 gras_libdata_by_id(gras_trp_libdata_id))->sockets, &res);
261 static void gras_trp_sock_socket_close(gras_socket_t sock)
265 return; /* close only once */
267 VERB1("close tcp connection %d", sock->sd);
269 if (tcp_close(sock->sd) < 0) {
270 WARN3("error while closing tcp socket %d: %d (%s)\n",
271 sock->sd, sock_errno, sock_errstr(sock_errno));
275 /************************************/
276 /****[ end of SOCKET MANAGEMENT ]****/
277 /************************************/
280 /************************************/
281 /****[ UNBUFFERED DATA EXCHANGE ]****/
282 /************************************/
283 /* Temptation to merge this with file data exchange is great,
284 but doesn't work on BillWare (see tcp_write() in portable.h) */
285 static XBT_INLINE void gras_trp_tcp_send(gras_socket_t sock,
287 unsigned long int size)
293 status = tcp_write(sock->sd, data, (size_t) size);
294 DEBUG3("write(%d, %p, %ld);", sock->sd, data, size);
298 if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK)
300 if (errno == EINTR || errno == EAGAIN)
304 THROW4(system_error, 0, "write(%d,%p,%ld) failed: %s",
305 sock->sd, data, size, sock_errstr(sock_errno));
312 THROW1(system_error, 0, "file descriptor closed (%s)",
313 sock_errstr(sock_errno));
318 static XBT_INLINE int
319 gras_trp_tcp_recv_withbuffer(gras_socket_t sock,
321 unsigned long int size,
322 unsigned long int bufsize)
328 data[0] = sock->recvd_val;
337 DEBUG5("read(%d, %p, %ld) got %d so far (%s)",
338 sock->sd, data + got, bufsize, got,
339 hexa_str((unsigned char *) data, got, 0));
340 status = tcp_read(sock->sd, data + got, (size_t) bufsize);
343 THROW7(system_error, 0,
344 "read(%d,%p,%d) from %s:%d failed: %s; got %d so far", sock->sd,
345 data + got, (int) size, gras_socket_peer_name(sock),
346 gras_socket_peer_port(sock), sock_errstr(sock_errno), got);
348 DEBUG2("Got %d more bytes (%s)", status,
349 hexa_str((unsigned char *) data + got, status, 0));
355 THROW1(system_error, errno,
356 "Socket closed by remote side (got %d bytes before this)", got);
363 static int gras_trp_tcp_recv(gras_socket_t sock,
364 char *data, unsigned long int size)
366 return gras_trp_tcp_recv_withbuffer(sock, data, size, size);
370 /*******************************************/
371 /****[ end of UNBUFFERED DATA EXCHANGE ]****/
372 /*******************************************/
374 /**********************************/
375 /****[ BUFFERED DATA EXCHANGE ]****/
376 /**********************************/
378 /* Make sure the data is sent */
379 static void gras_trp_bufiov_flush(gras_socket_t sock)
385 gras_trp_bufdata_t *data = sock->bufdata;
389 if (data->out == buffering_buf) {
390 if (XBT_LOG_ISENABLED(gras_trp_tcp, xbt_log_priority_debug))
391 hexa_print("chunk to send ",
392 (unsigned char *) data->out_buf.data, data->out_buf.size);
393 if ((data->out_buf.size - data->out_buf.pos) != 0) {
394 DEBUG3("Send the chunk (size=%d) to %s:%d", data->out_buf.size,
395 gras_socket_peer_name(sock), gras_socket_peer_port(sock));
396 gras_trp_tcp_send(sock, data->out_buf.data, data->out_buf.size);
397 VERB1("Chunk sent (size=%d)", data->out_buf.size);
398 data->out_buf.size = 0;
402 if (data->out == buffering_iov) {
403 DEBUG0("Flush out iov");
404 vect = sock->bufdata->out_buf_v;
405 if ((size = xbt_dynar_length(vect))) {
406 DEBUG1("Flush %d chunks out of this socket", size);
407 writev(sock->sd, xbt_dynar_get_ptr(vect, 0), size);
408 xbt_dynar_reset(vect);
410 data->out_buf.size = 0; /* reset the buffer containing non-stable data */
413 if (data->in == buffering_iov) {
414 DEBUG0("Flush in iov");
415 vect = sock->bufdata->in_buf_v;
416 if ((size = xbt_dynar_length(vect))) {
417 DEBUG1("Get %d chunks from of this socket", size);
418 readv(sock->sd, xbt_dynar_get_ptr(vect, 0), size);
419 xbt_dynar_reset(vect);
426 gras_trp_buf_send(gras_socket_t sock,
428 unsigned long int size, int stable_ignored)
431 gras_trp_bufdata_t *data = (gras_trp_bufdata_t *) sock->bufdata;
436 while (chunk_pos < size) {
437 /* size of the chunk to receive in that shot */
439 min(size - chunk_pos, data->buffsize - data->out_buf.size);
440 DEBUG4("Set the chars %d..%ld into the buffer; size=%ld, ctn=(%s)",
441 (int) data->out_buf.size,
442 ((int) data->out_buf.size) + thissize - 1, size,
443 hexa_str((unsigned char *) chunk, thissize, 0));
445 memcpy(data->out_buf.data + data->out_buf.size, chunk + chunk_pos,
448 data->out_buf.size += thissize;
449 chunk_pos += thissize;
450 DEBUG4("New pos = %d; Still to send = %ld of %ld; ctn sofar=(%s)",
451 data->out_buf.size, size - chunk_pos, size,
452 hexa_str((unsigned char *) chunk, chunk_pos, 0));
454 if (data->out_buf.size == data->buffsize) /* out of space. Flush it */
455 gras_trp_bufiov_flush(sock);
462 gras_trp_buf_recv(gras_socket_t sock, char *chunk, unsigned long int size)
465 gras_trp_bufdata_t *data = sock->bufdata;
466 long int chunk_pos = 0;
470 while (chunk_pos < size) {
471 /* size of the chunk to receive in that shot */
474 if (data->in_buf.size == data->in_buf.pos) { /* out of data. Get more */
476 DEBUG2("Get more data (size=%d,bufsize=%d)",
477 (int) MIN(size - chunk_pos, data->buffsize),
478 (int) data->buffsize);
482 gras_trp_tcp_recv_withbuffer(sock, data->in_buf.data,
483 MIN(size - chunk_pos, data->buffsize),
486 data->in_buf.pos = 0;
489 thissize = min(size - chunk_pos, data->in_buf.size - data->in_buf.pos);
490 memcpy(chunk + chunk_pos, data->in_buf.data + data->in_buf.pos, thissize);
492 data->in_buf.pos += thissize;
493 chunk_pos += thissize;
494 DEBUG4("New pos = %d; Still to receive = %ld of %ld. Ctn so far=(%s)",
495 data->in_buf.pos, size - chunk_pos, size,
496 hexa_str((unsigned char *) chunk, chunk_pos, 0));
498 /* indicate on need to the gras_select function that there is more to read on this socket so that it does not actually select */
499 sock->moredata = (data->in_buf.size > data->in_buf.pos);
500 DEBUG1("There is %smore data", (sock->moredata ? "" : "no "));
506 /*****************************************/
507 /****[ end of BUFFERED DATA EXCHANGE ]****/
508 /*****************************************/
510 /********************************/
511 /****[ VECTOR DATA EXCHANGE ]****/
512 /********************************/
515 gras_trp_iov_send(gras_socket_t sock,
516 const char *chunk, unsigned long int size, int stable)
519 gras_trp_bufdata_t *data = (gras_trp_bufdata_t *) sock->bufdata;
522 DEBUG1("Buffer one chunk to be sent later (%s)",
523 hexa_str((char *) chunk, size, 0));
525 elm.iov_len = (size_t) size;
528 /* data storage won't last until flush. Save it in a buffer if we can */
530 if (size > data->buffsize - data->out_buf.size) {
532 flush the socket, using data in its actual storage */
533 elm.iov_base = (void *) chunk;
534 xbt_dynar_push(data->out_buf_v, &elm);
536 gras_trp_bufiov_flush(sock);
539 /* buffer big enough:
540 copy data into it, and chain it for upcoming writev */
541 memcpy(data->out_buf.data + data->out_buf.size, chunk, size);
542 elm.iov_base = (void *) (data->out_buf.data + data->out_buf.size);
543 data->out_buf.size += size;
545 xbt_dynar_push(data->out_buf_v, &elm);
549 /* data storage stable. Chain it */
551 elm.iov_base = (void *) chunk;
552 xbt_dynar_push(data->out_buf_v, &elm);
557 gras_trp_iov_recv(gras_socket_t sock, char *chunk, unsigned long int size)
561 DEBUG0("Buffer one chunk to be received later");
562 elm.iov_base = (void *) chunk;
563 elm.iov_len = (size_t) size;
564 xbt_dynar_push(sock->bufdata->in_buf_v, &elm);
570 /***************************************/
571 /****[ end of VECTOR DATA EXCHANGE ]****/
572 /***************************************/
576 *** Prototypes of BUFFERED
579 void gras_trp_buf_socket_client(gras_trp_plugin_t self, gras_socket_t sock);
580 void gras_trp_buf_socket_server(gras_trp_plugin_t self, gras_socket_t sock);
581 gras_socket_t gras_trp_buf_socket_accept(gras_socket_t sock);
583 void gras_trp_buf_socket_close(gras_socket_t sd);
586 gras_socket_t gras_trp_buf_init_sock(gras_socket_t sock)
588 gras_trp_bufdata_t *data = xbt_new(gras_trp_bufdata_t, 1);
590 data->buffsize = 100 * 1024; /* 100k */
592 data->in_buf.size = 0;
593 data->in_buf.data = xbt_malloc(data->buffsize);
594 data->in_buf.pos = 0; /* useless, indeed, since size==pos */
596 data->out_buf.size = 0;
597 data->out_buf.data = xbt_malloc(data->buffsize);
598 data->out_buf.pos = data->out_buf.size;
601 data->in_buf_v = data->out_buf_v = NULL;
602 data->in_buf_v = xbt_dynar_new(sizeof(struct iovec), NULL);
603 data->out_buf_v = xbt_dynar_new(sizeof(struct iovec), NULL);
604 data->out = buffering_iov;
606 data->out = buffering_buf;
609 data->in = buffering_buf;
611 sock->bufdata = data;
618 void gras_trp_tcp_setup(gras_trp_plugin_t plug)
621 plug->socket_client = gras_trp_buf_socket_client;
622 plug->socket_server = gras_trp_buf_socket_server;
623 plug->socket_accept = gras_trp_buf_socket_accept;
624 plug->socket_close = gras_trp_buf_socket_close;
627 plug->send = gras_trp_iov_send;
629 plug->send = gras_trp_buf_send;
631 plug->recv = gras_trp_buf_recv;
633 plug->raw_send = gras_trp_tcp_send;
634 plug->raw_recv = gras_trp_tcp_recv;
636 plug->flush = gras_trp_bufiov_flush;
642 void gras_trp_buf_socket_client(gras_trp_plugin_t self,
643 /* OUT */ gras_socket_t sock)
646 gras_trp_sock_socket_client(NULL, sock);
647 gras_trp_buf_init_sock(sock);
651 * gras_trp_buf_socket_server:
653 * Open a socket used to receive messages.
655 void gras_trp_buf_socket_server(gras_trp_plugin_t self,
656 /* OUT */ gras_socket_t sock)
659 gras_trp_sock_socket_server(NULL, sock);
660 gras_trp_buf_init_sock(sock);
663 gras_socket_t gras_trp_buf_socket_accept(gras_socket_t sock)
665 return gras_trp_buf_init_sock(gras_trp_sock_socket_accept(sock));
668 void gras_trp_buf_socket_close(gras_socket_t sock)
670 gras_trp_bufdata_t *data = sock->bufdata;
672 if (data->in_buf.size != data->in_buf.pos) {
673 WARN3("Socket closed, but %d bytes were unread (size=%d,pos=%d)",
674 data->in_buf.size - data->in_buf.pos,
675 data->in_buf.size, data->in_buf.pos);
677 if (data->in_buf.data)
678 free(data->in_buf.data);
680 if (data->out_buf.size != data->out_buf.pos) {
681 DEBUG2("Flush the socket before closing (in=%d,out=%d)",
682 data->in_buf.size, data->out_buf.size);
683 gras_trp_bufiov_flush(sock);
685 if (data->out_buf.data)
686 free(data->out_buf.data);
689 if (data->in_buf_v) {
690 if (xbt_dynar_length(data->in_buf_v))
691 WARN0("Socket closed, but some bytes were unread");
692 xbt_dynar_free(&data->in_buf_v);
694 if (data->out_buf_v) {
695 if (xbt_dynar_length(data->out_buf_v)) {
696 DEBUG0("Flush the socket before closing");
697 gras_trp_bufiov_flush(sock);
699 xbt_dynar_free(&data->out_buf_v);
704 gras_trp_sock_socket_close(sock);
707 /****************************/
708 /****[ HELPER FUNCTIONS ]****/
709 /****************************/
712 * Returns the tcp protocol number from the network protocol data base.
714 * getprotobyname() is not thread safe. We need to lock it.
716 static int _gras_tcp_proto_number(void)
718 struct protoent *fetchedEntry;
719 static int returnValue = 0;
721 if (returnValue == 0) {
722 fetchedEntry = getprotobyname("tcp");
723 xbt_assert0(fetchedEntry, "getprotobyname(tcp) gave NULL");
724 returnValue = fetchedEntry->p_proto;
730 #ifdef HAVE_WINSOCK_H
731 #define RETSTR( x ) case x: return #x
733 const char *gras_wsa_err2string(int err)
742 RETSTR(WSAEWOULDBLOCK);
743 RETSTR(WSAEINPROGRESS);
746 RETSTR(WSAEDESTADDRREQ);
748 RETSTR(WSAEPROTOTYPE);
749 RETSTR(WSAENOPROTOOPT);
750 RETSTR(WSAEPROTONOSUPPORT);
751 RETSTR(WSAESOCKTNOSUPPORT);
752 RETSTR(WSAEOPNOTSUPP);
753 RETSTR(WSAEPFNOSUPPORT);
754 RETSTR(WSAEAFNOSUPPORT);
755 RETSTR(WSAEADDRINUSE);
756 RETSTR(WSAEADDRNOTAVAIL);
758 RETSTR(WSAENETUNREACH);
759 RETSTR(WSAENETRESET);
760 RETSTR(WSAECONNABORTED);
761 RETSTR(WSAECONNRESET);
765 RETSTR(WSAESHUTDOWN);
766 RETSTR(WSAETOOMANYREFS);
767 RETSTR(WSAETIMEDOUT);
768 RETSTR(WSAECONNREFUSED);
770 RETSTR(WSAENAMETOOLONG);
771 RETSTR(WSAEHOSTDOWN);
772 RETSTR(WSAEHOSTUNREACH);
773 RETSTR(WSAENOTEMPTY);
779 RETSTR(WSASYSNOTREADY);
780 RETSTR(WSAVERNOTSUPPORTED);
781 RETSTR(WSANOTINITIALISED);
786 RETSTR(WSAECANCELLED);
787 RETSTR(WSAEINVALIDPROCTABLE);
788 RETSTR(WSAEINVALIDPROVIDER);
789 RETSTR(WSASYSCALLFAILURE);
790 RETSTR(WSASERVICE_NOT_FOUND);
791 RETSTR(WSATYPE_NOT_FOUND);
792 RETSTR(WSA_E_NO_MORE);
793 RETSTR(WSA_E_CANCELLED);
795 #endif /* HAVE_WINSOCK2 */
797 RETSTR(WSAHOST_NOT_FOUND);
798 RETSTR(WSATRY_AGAIN);
799 RETSTR(WSANO_RECOVERY);
802 return "unknown WSA error";
804 #endif /* HAVE_WINSOCK_H */
806 /***********************************/
807 /****[ end of HELPER FUNCTIONS ]****/
808 /***********************************/