From patchwork Thu Aug 16 19:24:23 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Hefty, Sean" X-Patchwork-Id: 1334901 Return-Path: X-Original-To: patchwork-linux-rdma@patchwork.kernel.org Delivered-To: patchwork-process-083081@patchwork1.kernel.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by patchwork1.kernel.org (Postfix) with ESMTP id 84E6540B17 for ; Thu, 16 Aug 2012 19:24:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753374Ab2HPTY3 (ORCPT ); Thu, 16 Aug 2012 15:24:29 -0400 Received: from mga02.intel.com ([134.134.136.20]:6440 "EHLO mga02.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753343Ab2HPTYZ convert rfc822-to-8bit (ORCPT ); Thu, 16 Aug 2012 15:24:25 -0400 Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga101.jf.intel.com with ESMTP; 16 Aug 2012 12:24:24 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.77,780,1336374000"; d="scan'208";a="187368122" Received: from orsmsx604.amr.corp.intel.com ([10.22.226.87]) by orsmga002.jf.intel.com with ESMTP; 16 Aug 2012 12:24:24 -0700 Received: from orsmsx106.amr.corp.intel.com (10.22.225.133) by orsmsx604.amr.corp.intel.com (10.22.226.87) with Microsoft SMTP Server (TLS) id 8.2.255.0; Thu, 16 Aug 2012 12:24:24 -0700 Received: from orsmsx101.amr.corp.intel.com ([169.254.8.152]) by ORSMSX106.amr.corp.intel.com ([169.254.5.217]) with mapi id 14.01.0355.002; Thu, 16 Aug 2012 12:24:23 -0700 From: "Hefty, Sean" To: "linux-rdma (linux-rdma@vger.kernel.org)" Subject: [PATCH 5/7] librdmacm/rspreload: Do not block connect when supporting fork Thread-Topic: [PATCH 5/7] librdmacm/rspreload: Do not block connect when supporting fork Thread-Index: Ac175Gob+4sSXh2cSLGCZK3VQjLByw== Date: Thu, 16 Aug 2012 19:24:23 +0000 Message-ID: <1828884A29C6694DAF28B7E6B8A8237346A8997B@ORSMSX101.amr.corp.intel.com> Accept-Language: en-US Content-Language: en-US X-MS-Has-Attach: X-MS-TNEF-Correlator: x-originating-ip: [10.22.254.139] MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Many FTP servers require fork support. However, FTP clients, such as ncftp, will perform the following call sequence: send PASV request to server over connection 1 server will listen for connection 2 issue nonblocking connect to server send ACCEPT request to server over connection 1 server will accept connection 2 The current fork support converts all nonblocking connect calls to blocking. The result is that the FTP client ends up blocked waiting for the server to accept the connection, which it will never do. To handle this case, we have the active side follow the same rule as the server side and defer establishing the rsocket connection until the user calls the first data transfer routine. Signed-off-by: Sean Hefty --- src/preload.c | 141 +++++++++++++++++++++++++++++++++++++++------------------ 1 files changed, 96 insertions(+), 45 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html diff --git a/src/preload.c b/src/preload.c index bb8e3fb..8b86415 100644 --- a/src/preload.c +++ b/src/preload.c @@ -99,12 +99,20 @@ static int fork_support; enum fd_type { fd_normal, - fd_rsocket, - fd_fork + fd_rsocket +}; + +enum fd_fork_state { + fd_ready, + fd_fork, + fd_fork_listen, + fd_fork_active, + fd_fork_passive }; struct fd_info { enum fd_type type; + enum fd_fork_state state; int fd; int dupfd; atomic_t refcnt; @@ -143,13 +151,14 @@ err1: return ret; } -static void fd_store(int index, int fd, enum fd_type type) +static void fd_store(int index, int fd, enum fd_type type, enum fd_fork_state state) { struct fd_info *fdi; fdi = idm_at(&idm, index); fdi->fd = fd; fdi->type = type; + fdi->state = state; } static inline enum fd_type fd_get(int index, int *fd) @@ -175,6 +184,14 @@ static inline int fd_getd(int index) return fdi ? fdi->fd : index; } +static inline enum fd_fork_state fd_gets(int index) +{ + struct fd_info *fdi; + + fdi = idm_lookup(&idm, index); + return fdi ? fdi->state : fd_ready; +} + static inline enum fd_type fd_gett(int index) { struct fd_info *fdi; @@ -353,7 +370,7 @@ static int transpose_socket(int socket, enum fd_type new_type) if (ret) goto err; - fd_store(socket, dfd, new_type); + fd_store(socket, dfd, new_type, fd_ready); return dfd; err: @@ -398,9 +415,9 @@ int socket(int domain, int type, int protocol) ret = real.socket(domain, type, protocol); if (ret < 0) return ret; - fd_store(index, ret, fd_fork); + fd_store(index, ret, fd_normal, fd_fork); } else { - fd_store(index, ret, fd_rsocket); + fd_store(index, ret, fd_rsocket, fd_ready); set_rsocket_options(ret); } return index; @@ -419,30 +436,46 @@ int bind(int socket, const struct sockaddr *addr, socklen_t addrlen) int listen(int socket, int backlog) { - int fd; - return (fd_get(socket, &fd) == fd_rsocket) ? - rlisten(fd, backlog) : real.listen(fd, backlog); + int fd, ret; + if (fd_get(socket, &fd) == fd_rsocket) { + ret = rlisten(fd, backlog); + } else { + ret = real.listen(fd, backlog); + if (!ret && fd_gets(socket) == fd_fork) + fd_store(socket, fd, fd_normal, fd_fork_listen); + } + return ret; } int accept(int socket, struct sockaddr *addr, socklen_t *addrlen) { int fd, index, ret; - enum fd_type type; - type = fd_get(socket, &fd); - if (type == fd_rsocket || type == fd_fork) { + if (fd_get(socket, &fd) == fd_rsocket) { index = fd_open(); if (index < 0) return index; - ret = (type == fd_rsocket) ? raccept(fd, addr, addrlen) : - real.accept(fd, addr, addrlen); + ret = raccept(fd, addr, addrlen); if (ret < 0) { fd_close(index, &fd); return ret; } - fd_store(index, ret, type); + fd_store(index, ret, fd_rsocket, fd_ready); + return index; + } else if (fd_gets(socket) == fd_fork_listen) { + index = fd_open(); + if (index < 0) + return index; + + ret = real.accept(fd, addr, addrlen); + if (ret < 0) { + fd_close(index, &fd); + return ret; + } + + fd_store(index, ret, fd_normal, fd_fork_passive); return index; } else { return real.accept(fd, addr, addrlen); @@ -453,37 +486,57 @@ int accept(int socket, struct sockaddr *addr, socklen_t *addrlen) * We can't fork RDMA connections and pass them from the parent to the child * process. Instead, we need to establish the RDMA connection after calling * fork. To do this, we delay establishing the RDMA connection until we try - * to send/receive on the server side. On the client side, we don't expect - * to fork, so we switch from a TCP connection to an rsocket when connecting. + * to send/receive on the server side. */ -static int fork_active(int socket, const struct sockaddr *addr, socklen_t addrlen) +static void fork_active(int socket) { - int fd, ret; + struct sockaddr_storage addr; + int sfd, dfd, ret; + socklen_t len; uint32_t msg; long flags; - fd = fd_getd(socket); - flags = real.fcntl(fd, F_GETFL); - real.fcntl(fd, F_SETFL, 0); - ret = real.connect(fd, addr, addrlen); + sfd = fd_getd(socket); + + len = sizeof addr; + ret = real.getpeername(sfd, (struct sockaddr *) &addr, &len); if (ret) - return ret; + goto err1; - ret = real.recv(fd, &msg, sizeof msg, MSG_PEEK); - if ((ret != sizeof msg) || msg) { - fd_store(socket, fd, fd_normal); - return 0; - } + dfd = rsocket(addr.ss_family, SOCK_STREAM, 0); + if (dfd < 0) + goto err1; - real.fcntl(fd, F_SETFL, flags); - ret = transpose_socket(socket, fd_rsocket); - if (ret < 0) - return ret; + flags = real.fcntl(sfd, F_GETFL); + real.fcntl(sfd, F_SETFL, 0); + ret = real.recv(sfd, &msg, sizeof msg, MSG_PEEK); + real.fcntl(sfd, F_SETFL, flags); + if ((ret != sizeof msg) || msg) + goto err2; + + ret = rconnect(ret, (struct sockaddr *) &addr, len); + if (ret) + goto err2; - real.close(fd); - return rconnect(ret, addr, addrlen); + set_rsocket_options(dfd); + copysockopts(dfd, sfd, &rs, &real); + real.shutdown(sfd, SHUT_RDWR); + real.close(sfd); + fd_store(socket, dfd, fd_rsocket, fd_ready); + return; + +err2: + rclose(dfd); +err1: + fd_store(socket, sfd, fd_normal, fd_ready); } +/* + * The server will start listening for the new connection, then send a + * message to the active side when the listen is ready. This does leave + * fork unsupported in the following case: the server is nonblocking and + * calls select/poll waiting to receive data from the client. + */ static void fork_passive(int socket) { struct sockaddr_in6 sin6; @@ -541,7 +594,7 @@ static void fork_passive(int socket) copysockopts(dfd, sfd, &rs, &real); real.shutdown(sfd, SHUT_RDWR); real.close(sfd); - fd_store(socket, dfd, fd_rsocket); + fd_store(socket, dfd, fd_rsocket, fd_ready); lclose: rclose(lfd); @@ -550,7 +603,7 @@ sclose: sem_close(sem); out: if (ret) - fd_store(socket, sfd, fd_normal); + fd_store(socket, sfd, fd_normal, fd_ready); } static inline enum fd_type fd_fork_get(int index, int *fd) @@ -559,8 +612,10 @@ static inline enum fd_type fd_fork_get(int index, int *fd) fdi = idm_lookup(&idm, index); if (fdi) { - if (fdi->type == fd_fork) + if (fdi->type == fd_fork_passive) fork_passive(index); + else if (fdi->type == fd_fork_active) + fork_active(index); *fd = fdi->fd; return fdi->type; @@ -574,10 +629,7 @@ int connect(int socket, const struct sockaddr *addr, socklen_t addrlen) { int fd, ret; - switch (fd_get(socket, &fd)) { - case fd_fork: - return fork_active(socket, addr, addrlen); - case fd_rsocket: + if (fd_get(socket, &fd) == fd_rsocket) { ret = rconnect(fd, addr, addrlen); if (!ret || errno == EINPROGRESS) return ret; @@ -588,9 +640,8 @@ int connect(int socket, const struct sockaddr *addr, socklen_t addrlen) rclose(fd); fd = ret; - break; - default: - break; + } else if (fd_gets(socket) == fd_fork) { + fd_store(socket, fd, fd_normal, fd_fork_active); } return real.connect(fd, addr, addrlen);