From patchwork Thu Dec 2 19:41:35 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jim Schutt X-Patchwork-Id: 375681 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id oB2JfnDZ025820 for ; Thu, 2 Dec 2010 19:41:49 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755378Ab0LBTlr (ORCPT ); Thu, 2 Dec 2010 14:41:47 -0500 Received: from sentry-three.sandia.gov ([132.175.109.17]:51988 "EHLO sentry-three.sandia.gov" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753792Ab0LBTlr (ORCPT ); Thu, 2 Dec 2010 14:41:47 -0500 X-WSS-ID: 0LCTG1I-0C-8TT-02 X-M-MSG: Received: from sentry.sandia.gov (sentry.sandia.gov [132.175.109.20]) by sentry-three.sandia.gov (Postfix) with ESMTP id 1C4AA478DA3 for ; Thu, 2 Dec 2010 12:41:42 -0700 (MST) Received: from [132.175.109.1] by sentry.sandia.gov with ESMTP (SMTP Relay 01 (Email Firewall v6.3.2)); Thu, 02 Dec 2010 12:41:38 -0700 X-Server-Uuid: AF72F651-81B1-4134-BA8C-A8E1A4E620FF Received: from localhost.localdomain (skynetcore1.sandia.gov [134.253.138.22]) by mailgate.sandia.gov (8.14.4/8.14.4) with ESMTP id oB2JfRgw027231; Thu, 2 Dec 2010 12:41:27 -0700 From: "Jim Schutt" To: ceph-devel@vger.kernel.org cc: "Jim Schutt" Subject: [PATCH] msgr: Correctly handle half-open connections. Date: Thu, 2 Dec 2010 12:41:35 -0700 Message-ID: <1291318895-11879-1-git-send-email-jaschut@sandia.gov> X-Mailer: git-send-email 1.6.6 X-PMX-Version: 5.6.0.2009776, Antispam-Engine: 2.7.2.376379, Antispam-Data: 2010.12.2.193323 X-PMX-Spam: Gauge=IIIIIIII, Probability=8%, Report=' BODY_SIZE_6000_6999 0, BODY_SIZE_7000_LESS 0, DATE_TZ_NA 0, __HAS_MSGID 0, __HAS_X_MAILER 0, __LINES_OF_YELLING 0, __MIME_TEXT_ONLY 0, __SANE_MSGID 0, __TO_MALFORMED_2 0, __TO_NO_NAME 0, __URI_NO_PATH 0, __URI_NO_WWW 0, __URI_NS ' X-TMWD-Spam-Summary: TS=20101202194139; ID=1; SEV=2.3.1; DFV=B2010120219; IFV=NA; AIF=B2010120219; RPD=5.03.0010; ENG=NA; RPDID=7374723D303030312E30413031303230332E34434637463637332E303043453A534346535441543838363133332C73733D312C6667733D30; CAT=NONE; CON=NONE; SIG=AAAAAAAAAAAAAAAAAAAAAAAAfQ== X-MMS-Spam-Filter-ID: B2010120219_5.03.0010 MIME-Version: 1.0 X-WSS-ID: 60E929F84KO3973799-01-01 Sender: ceph-devel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: ceph-devel@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter1.kernel.org [140.211.167.41]); Thu, 02 Dec 2010 19:41:50 +0000 (UTC) diff --git a/src/msg/SimpleMessenger.cc b/src/msg/SimpleMessenger.cc index 97ed4c3..0c9a0bd 100644 --- a/src/msg/SimpleMessenger.cc +++ b/src/msg/SimpleMessenger.cc @@ -1875,7 +1875,7 @@ int SimpleMessenger::Pipe::read_message(Message **pm) while (left > 0) { // wait for data - if (tcp_wait(sd, messenger->timeout) < 0) + if (tcp_read_wait(sd, messenger->timeout) < 0) goto out_dethrottle; // get a buffer diff --git a/src/msg/tcp.cc b/src/msg/tcp.cc index c1be756..71d85f1 100644 --- a/src/msg/tcp.cc +++ b/src/msg/tcp.cc @@ -13,9 +13,6 @@ int tcp_read(int sd, char *buf, int len, int timeout) if (sd < 0) return -1; - struct pollfd pfd; - pfd.fd = sd; - pfd.events = POLLIN | POLLHUP | POLLRDHUP | POLLNVAL | POLLERR; while (len > 0) { if (g_conf.ms_inject_socket_failures && sd >= 0) { @@ -25,24 +22,14 @@ int tcp_read(int sd, char *buf, int len, int timeout) } } - if (poll(&pfd, 1, timeout) <= 0) + if (tcp_read_wait(sd, timeout) < 0) return -1; - if (!(pfd.revents & POLLIN)) - return -1; + int got = tcp_read_nonblocking(sd, buf, len); - /* - * although we turn on the MSG_DONTWAIT flag, we don't expect - * receivng an EAGAIN, as we polled on the socket, so there - * should be data waiting for us. - */ - int got = ::recv( sd, buf, len, MSG_DONTWAIT ); - if (got <= 0) { - //char buf[100]; - //generic_dout(0) << "tcp_read socket " << sd << " returned " << got - //<< " errno " << errno << " " << strerror_r(errno, buf, sizeof(buf)) << dendl; + if (got < 0) return -1; - } + len -= got; buf += got; //generic_dout(DBL) << "tcp_read got " << got << ", " << len << " left" << dendl; @@ -50,26 +37,51 @@ int tcp_read(int sd, char *buf, int len, int timeout) return len; } -int tcp_wait(int sd, int timeout) +int tcp_read_wait(int sd, int timeout) { if (sd < 0) return -1; struct pollfd pfd; pfd.fd = sd; - pfd.events = POLLIN | POLLHUP | POLLRDHUP | POLLNVAL | POLLERR; + pfd.events = POLLIN | POLLRDHUP; if (poll(&pfd, 1, timeout) <= 0) return -1; + if (pfd.revents & (POLLERR | POLLHUP | POLLRDHUP | POLLNVAL)) + return -1; + if (!(pfd.revents & POLLIN)) return -1; return 0; } +/* This function can only be called if poll/select says there is + * data available. Otherwise we cannot properly interpret a + * read of 0 bytes. + */ int tcp_read_nonblocking(int sd, char *buf, int len) { - return ::recv(sd, buf, len, MSG_DONTWAIT); +again: + int got = ::recv( sd, buf, len, MSG_DONTWAIT ); + if (got < 0) { + if (errno == EAGAIN || errno == EINTR) { + goto again; + } else { + char buf[100]; + generic_dout(10) << "tcp_read_nonblocking socket " << sd << " returned " + << got << " errno " << errno << " " << strerror_r(errno, buf, sizeof(buf)) << dendl; + return -1; + } + } else if (got == 0) { + /* poll() said there was data, but we didn't read any - peer + * sent a FIN. Maybe POLLRDHUP signals this, but this is + * standard socket behavior as documented by Stevens. + */ + return -1; + } + return got; } int tcp_write(int sd, const char *buf, int len) diff --git a/src/msg/tcp.h b/src/msg/tcp.h index 31ae967..bccdbda 100644 --- a/src/msg/tcp.h +++ b/src/msg/tcp.h @@ -26,7 +26,7 @@ inline ostream& operator<<(ostream& out, const sockaddr_storage &ss) } extern int tcp_read(int sd, char *buf, int len, int timeout=-1); -extern int tcp_wait(int sd, int timeout); +extern int tcp_read_wait(int sd, int timeout); extern int tcp_read_nonblocking(int sd, char *buf, int len); extern int tcp_write(int sd, const char *buf, int len);