Message ID | 20210811000818.95985-1-dai.ngo@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2,1/1] Fix DoS vulnerability in libtirpc | expand |
> On Aug 10, 2021, at 8:08 PM, Dai Ngo <dai.ngo@oracle.com> wrote: > > Currently svc_run does not handle poll timeout and rendezvous_request > does not handle EMFILE error returned from accept(2 as it used to. > These two missing functionality were removed by commit b2c9430f46c4. > > The effect of not handling poll timeout allows idle TCP conections > to remain ESTABLISHED indefinitely. When the number of connections > reaches the limit of the open file descriptors (ulimit -n) then > accept(2) fails with EMFILE. Since there is no handling of EMFILE > error this causes svc_run() to get in a tight loop calling accept(2). > This resulting in the RPC service of svc_run is being down, it's > no longer able to service any requests. > > if [ $# -ne 3 ]; then > echo "$0: server dst_port conn_cnt" > exit > fi > server=$1 > dport=$2 > conn_cnt=$3 > echo "dport[$dport] server[$server] conn_cnt[$conn_cnt]" > > pcnt=0 > while [ $pcnt -lt $conn_cnt ] > do > nc -v --recv-only $server $dport & > pcnt=`expr $pcnt + 1` > done > > RPC service rpcbind, statd and mountd are effected by this > problem. > > Fix by enhancing rendezvous_request to keep the number of > SVCXPRT conections to 4/5 of the size of the file descriptor > table. When this thresold is reached, it destroys the idle > TCP connections or destroys the least active connection if > no idle connnction was found. > > Fixes: 44bf15b8 rpcbind: don't use obsolete svc_fdset interface of libtirpc > Signed-off-by: dai.ngo@oracle.com Thanks, Dai, this version makes me feel a lot better. I didn't look too closely at the new __svc_destroy_idle() function. I know you based it on __svc_clean_idle(), but I wonder if we have any regression tests in this area. > --- > src/svc.c | 17 +++++++++++++- > src/svc_vc.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++- > 2 files changed, 77 insertions(+), 2 deletions(-) > > diff --git a/src/svc.c b/src/svc.c > index 6db164bbd76b..3a8709fe375c 100644 > --- a/src/svc.c > +++ b/src/svc.c > @@ -57,7 +57,7 @@ > > #define max(a, b) (a > b ? a : b) > > -static SVCXPRT **__svc_xports; > +SVCXPRT **__svc_xports; > int __svc_maxrec; > > /* > @@ -194,6 +194,21 @@ __xprt_do_unregister (xprt, dolock) > rwlock_unlock (&svc_fd_lock); > } > > +int > +svc_open_fds() > +{ > + int ix; > + int nfds = 0; > + > + rwlock_rdlock (&svc_fd_lock); > + for (ix = 0; ix < svc_max_pollfd; ++ix) { > + if (svc_pollfd[ix].fd != -1) > + nfds++; > + } > + rwlock_unlock (&svc_fd_lock); > + return (nfds); > +} > + > /* > * Add a service program to the callout list. > * The dispatch routine will be called when a rpc request for this > diff --git a/src/svc_vc.c b/src/svc_vc.c > index f1d9f001fcdc..3dc8a75787e1 100644 > --- a/src/svc_vc.c > +++ b/src/svc_vc.c > @@ -64,6 +64,8 @@ > > > extern rwlock_t svc_fd_lock; > +extern SVCXPRT **__svc_xports; > +extern int svc_open_fds(); > > static SVCXPRT *makefd_xprt(int, u_int, u_int); > static bool_t rendezvous_request(SVCXPRT *, struct rpc_msg *); > @@ -82,6 +84,7 @@ static void svc_vc_ops(SVCXPRT *); > static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in); > static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq, > void *in); > +static int __svc_destroy_idle(int timeout); > > struct cf_rendezvous { /* kept in xprt->xp_p1 for rendezvouser */ > u_int sendsize; > @@ -313,13 +316,14 @@ done: > return (xprt); > } > > + > /*ARGSUSED*/ > static bool_t > rendezvous_request(xprt, msg) > SVCXPRT *xprt; > struct rpc_msg *msg; > { > - int sock, flags; > + int sock, flags, nfds, cnt; > struct cf_rendezvous *r; > struct cf_conn *cd; > struct sockaddr_storage addr; > @@ -379,6 +383,16 @@ again: > > gettimeofday(&cd->last_recv_time, NULL); > > + nfds = svc_open_fds(); > + if (nfds >= (_rpc_dtablesize() / 5) * 4) { > + /* destroy idle connections */ > + cnt = __svc_destroy_idle(15); > + if (cnt == 0) { > + /* destroy least active */ > + __svc_destroy_idle(0); > + } > + } > + > return (FALSE); /* there is never an rpc msg to be processed */ > } > > @@ -820,3 +834,49 @@ __svc_clean_idle(fd_set *fds, int timeout, bool_t cleanblock) > { > return FALSE; > } > + > +static int > +__svc_destroy_idle(int timeout) > +{ > + int i, ncleaned = 0; > + SVCXPRT *xprt, *least_active; > + struct timeval tv, tdiff, tmax; > + struct cf_conn *cd; > + > + gettimeofday(&tv, NULL); > + tmax.tv_sec = tmax.tv_usec = 0; > + least_active = NULL; > + rwlock_wrlock(&svc_fd_lock); > + > + for (i = 0; i <= svc_max_pollfd; i++) { > + if (svc_pollfd[i].fd == -1) > + continue; > + xprt = __svc_xports[i]; > + if (xprt == NULL || xprt->xp_ops == NULL || > + xprt->xp_ops->xp_recv != svc_vc_recv) > + continue; > + cd = (struct cf_conn *)xprt->xp_p1; > + if (!cd->nonblock) > + continue; > + if (timeout == 0) { > + timersub(&tv, &cd->last_recv_time, &tdiff); > + if (timercmp(&tdiff, &tmax, >)) { > + tmax = tdiff; > + least_active = xprt; > + } > + continue; > + } > + if (tv.tv_sec - cd->last_recv_time.tv_sec > timeout) { > + __xprt_unregister_unlocked(xprt); > + __svc_vc_dodestroy(xprt); > + ncleaned++; > + } > + } > + if (timeout == 0 && least_active != NULL) { > + __xprt_unregister_unlocked(least_active); > + __svc_vc_dodestroy(least_active); > + ncleaned++; > + } > + rwlock_unlock(&svc_fd_lock); > + return (ncleaned); > +} > -- > 2.26.2 > -- Chuck Lever
On 8/11/21 12:43 PM, Chuck Lever III wrote: > >> On Aug 10, 2021, at 8:08 PM, Dai Ngo <dai.ngo@oracle.com> wrote: >> >> Currently svc_run does not handle poll timeout and rendezvous_request >> does not handle EMFILE error returned from accept(2 as it used to. >> These two missing functionality were removed by commit b2c9430f46c4. >> >> The effect of not handling poll timeout allows idle TCP conections >> to remain ESTABLISHED indefinitely. When the number of connections >> reaches the limit of the open file descriptors (ulimit -n) then >> accept(2) fails with EMFILE. Since there is no handling of EMFILE >> error this causes svc_run() to get in a tight loop calling accept(2). >> This resulting in the RPC service of svc_run is being down, it's >> no longer able to service any requests. >> >> if [ $# -ne 3 ]; then >> echo "$0: server dst_port conn_cnt" >> exit >> fi >> server=$1 >> dport=$2 >> conn_cnt=$3 >> echo "dport[$dport] server[$server] conn_cnt[$conn_cnt]" >> >> pcnt=0 >> while [ $pcnt -lt $conn_cnt ] >> do >> nc -v --recv-only $server $dport & >> pcnt=`expr $pcnt + 1` >> done >> >> RPC service rpcbind, statd and mountd are effected by this >> problem. >> >> Fix by enhancing rendezvous_request to keep the number of >> SVCXPRT conections to 4/5 of the size of the file descriptor >> table. When this thresold is reached, it destroys the idle >> TCP connections or destroys the least active connection if >> no idle connnction was found. >> >> Fixes: 44bf15b8 rpcbind: don't use obsolete svc_fdset interface of libtirpc >> Signed-off-by: dai.ngo@oracle.com > Thanks, Dai, this version makes me feel a lot better. > > I didn't look too closely at the new __svc_destroy_idle() > function. I know you based it on __svc_clean_idle(), but > I wonder if we have any regression tests in this area. Thank you Chuck for your review. So far I used the 'nc' tool to verify rpcbind, statd and mountd no longer have this problem. I also verified that NFSv3 and NFSv4.x mount also work while the tests were running. I don't know of any tests specifically designed for this but I will look around to see if we can use some NFS tests that would exercise these services. -Dai > > >> --- >> src/svc.c | 17 +++++++++++++- >> src/svc_vc.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++- >> 2 files changed, 77 insertions(+), 2 deletions(-) >> >> diff --git a/src/svc.c b/src/svc.c >> index 6db164bbd76b..3a8709fe375c 100644 >> --- a/src/svc.c >> +++ b/src/svc.c >> @@ -57,7 +57,7 @@ >> >> #define max(a, b) (a > b ? a : b) >> >> -static SVCXPRT **__svc_xports; >> +SVCXPRT **__svc_xports; >> int __svc_maxrec; >> >> /* >> @@ -194,6 +194,21 @@ __xprt_do_unregister (xprt, dolock) >> rwlock_unlock (&svc_fd_lock); >> } >> >> +int >> +svc_open_fds() >> +{ >> + int ix; >> + int nfds = 0; >> + >> + rwlock_rdlock (&svc_fd_lock); >> + for (ix = 0; ix < svc_max_pollfd; ++ix) { >> + if (svc_pollfd[ix].fd != -1) >> + nfds++; >> + } >> + rwlock_unlock (&svc_fd_lock); >> + return (nfds); >> +} >> + >> /* >> * Add a service program to the callout list. >> * The dispatch routine will be called when a rpc request for this >> diff --git a/src/svc_vc.c b/src/svc_vc.c >> index f1d9f001fcdc..3dc8a75787e1 100644 >> --- a/src/svc_vc.c >> +++ b/src/svc_vc.c >> @@ -64,6 +64,8 @@ >> >> >> extern rwlock_t svc_fd_lock; >> +extern SVCXPRT **__svc_xports; >> +extern int svc_open_fds(); >> >> static SVCXPRT *makefd_xprt(int, u_int, u_int); >> static bool_t rendezvous_request(SVCXPRT *, struct rpc_msg *); >> @@ -82,6 +84,7 @@ static void svc_vc_ops(SVCXPRT *); >> static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in); >> static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq, >> void *in); >> +static int __svc_destroy_idle(int timeout); >> >> struct cf_rendezvous { /* kept in xprt->xp_p1 for rendezvouser */ >> u_int sendsize; >> @@ -313,13 +316,14 @@ done: >> return (xprt); >> } >> >> + >> /*ARGSUSED*/ >> static bool_t >> rendezvous_request(xprt, msg) >> SVCXPRT *xprt; >> struct rpc_msg *msg; >> { >> - int sock, flags; >> + int sock, flags, nfds, cnt; >> struct cf_rendezvous *r; >> struct cf_conn *cd; >> struct sockaddr_storage addr; >> @@ -379,6 +383,16 @@ again: >> >> gettimeofday(&cd->last_recv_time, NULL); >> >> + nfds = svc_open_fds(); >> + if (nfds >= (_rpc_dtablesize() / 5) * 4) { >> + /* destroy idle connections */ >> + cnt = __svc_destroy_idle(15); >> + if (cnt == 0) { >> + /* destroy least active */ >> + __svc_destroy_idle(0); >> + } >> + } >> + >> return (FALSE); /* there is never an rpc msg to be processed */ >> } >> >> @@ -820,3 +834,49 @@ __svc_clean_idle(fd_set *fds, int timeout, bool_t cleanblock) >> { >> return FALSE; >> } >> + >> +static int >> +__svc_destroy_idle(int timeout) >> +{ >> + int i, ncleaned = 0; >> + SVCXPRT *xprt, *least_active; >> + struct timeval tv, tdiff, tmax; >> + struct cf_conn *cd; >> + >> + gettimeofday(&tv, NULL); >> + tmax.tv_sec = tmax.tv_usec = 0; >> + least_active = NULL; >> + rwlock_wrlock(&svc_fd_lock); >> + >> + for (i = 0; i <= svc_max_pollfd; i++) { >> + if (svc_pollfd[i].fd == -1) >> + continue; >> + xprt = __svc_xports[i]; >> + if (xprt == NULL || xprt->xp_ops == NULL || >> + xprt->xp_ops->xp_recv != svc_vc_recv) >> + continue; >> + cd = (struct cf_conn *)xprt->xp_p1; >> + if (!cd->nonblock) >> + continue; >> + if (timeout == 0) { >> + timersub(&tv, &cd->last_recv_time, &tdiff); >> + if (timercmp(&tdiff, &tmax, >)) { >> + tmax = tdiff; >> + least_active = xprt; >> + } >> + continue; >> + } >> + if (tv.tv_sec - cd->last_recv_time.tv_sec > timeout) { >> + __xprt_unregister_unlocked(xprt); >> + __svc_vc_dodestroy(xprt); >> + ncleaned++; >> + } >> + } >> + if (timeout == 0 && least_active != NULL) { >> + __xprt_unregister_unlocked(least_active); >> + __svc_vc_dodestroy(least_active); >> + ncleaned++; >> + } >> + rwlock_unlock(&svc_fd_lock); >> + return (ncleaned); >> +} >> -- >> 2.26.2 >> > -- > Chuck Lever > > >
Hi Chuck, On 8/11/21 2:16 PM, dai.ngo@oracle.com wrote: > > On 8/11/21 12:43 PM, Chuck Lever III wrote: >> >>> On Aug 10, 2021, at 8:08 PM, Dai Ngo <dai.ngo@oracle.com> wrote: >>> >>> Currently svc_run does not handle poll timeout and rendezvous_request >>> does not handle EMFILE error returned from accept(2 as it used to. >>> These two missing functionality were removed by commit b2c9430f46c4. >>> >>> The effect of not handling poll timeout allows idle TCP conections >>> to remain ESTABLISHED indefinitely. When the number of connections >>> reaches the limit of the open file descriptors (ulimit -n) then >>> accept(2) fails with EMFILE. Since there is no handling of EMFILE >>> error this causes svc_run() to get in a tight loop calling accept(2). >>> This resulting in the RPC service of svc_run is being down, it's >>> no longer able to service any requests. >>> >>> if [ $# -ne 3 ]; then >>> echo "$0: server dst_port conn_cnt" >>> exit >>> fi >>> server=$1 >>> dport=$2 >>> conn_cnt=$3 >>> echo "dport[$dport] server[$server] conn_cnt[$conn_cnt]" >>> >>> pcnt=0 >>> while [ $pcnt -lt $conn_cnt ] >>> do >>> nc -v --recv-only $server $dport & >>> pcnt=`expr $pcnt + 1` >>> done >>> >>> RPC service rpcbind, statd and mountd are effected by this >>> problem. >>> >>> Fix by enhancing rendezvous_request to keep the number of >>> SVCXPRT conections to 4/5 of the size of the file descriptor >>> table. When this thresold is reached, it destroys the idle >>> TCP connections or destroys the least active connection if >>> no idle connnction was found. >>> >>> Fixes: 44bf15b8 rpcbind: don't use obsolete svc_fdset interface of >>> libtirpc >>> Signed-off-by: dai.ngo@oracle.com >> Thanks, Dai, this version makes me feel a lot better. >> >> I didn't look too closely at the new __svc_destroy_idle() >> function. I know you based it on __svc_clean_idle(), but >> I wonder if we have any regression tests in this area. > > Thank you Chuck for your review. > > So far I used the 'nc' tool to verify rpcbind, statd and mountd > no longer have this problem. I also verified that NFSv3 and NFSv4.x > mount also work while the tests were running. I don't know of any > tests specifically designed for this but I will look around to see > if we can use some NFS tests that would exercise these services. I ran the rpc, ti-rpc and ts-rpc tests of the LTP testsuite with and without the patch. The results are the same, there is no regression. -Dai > > -Dai > > >> >> >>> --- >>> src/svc.c | 17 +++++++++++++- >>> src/svc_vc.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++- >>> 2 files changed, 77 insertions(+), 2 deletions(-) >>> >>> diff --git a/src/svc.c b/src/svc.c >>> index 6db164bbd76b..3a8709fe375c 100644 >>> --- a/src/svc.c >>> +++ b/src/svc.c >>> @@ -57,7 +57,7 @@ >>> >>> #define max(a, b) (a > b ? a : b) >>> >>> -static SVCXPRT **__svc_xports; >>> +SVCXPRT **__svc_xports; >>> int __svc_maxrec; >>> >>> /* >>> @@ -194,6 +194,21 @@ __xprt_do_unregister (xprt, dolock) >>> rwlock_unlock (&svc_fd_lock); >>> } >>> >>> +int >>> +svc_open_fds() >>> +{ >>> + int ix; >>> + int nfds = 0; >>> + >>> + rwlock_rdlock (&svc_fd_lock); >>> + for (ix = 0; ix < svc_max_pollfd; ++ix) { >>> + if (svc_pollfd[ix].fd != -1) >>> + nfds++; >>> + } >>> + rwlock_unlock (&svc_fd_lock); >>> + return (nfds); >>> +} >>> + >>> /* >>> * Add a service program to the callout list. >>> * The dispatch routine will be called when a rpc request for this >>> diff --git a/src/svc_vc.c b/src/svc_vc.c >>> index f1d9f001fcdc..3dc8a75787e1 100644 >>> --- a/src/svc_vc.c >>> +++ b/src/svc_vc.c >>> @@ -64,6 +64,8 @@ >>> >>> >>> extern rwlock_t svc_fd_lock; >>> +extern SVCXPRT **__svc_xports; >>> +extern int svc_open_fds(); >>> >>> static SVCXPRT *makefd_xprt(int, u_int, u_int); >>> static bool_t rendezvous_request(SVCXPRT *, struct rpc_msg *); >>> @@ -82,6 +84,7 @@ static void svc_vc_ops(SVCXPRT *); >>> static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in); >>> static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq, >>> void *in); >>> +static int __svc_destroy_idle(int timeout); >>> >>> struct cf_rendezvous { /* kept in xprt->xp_p1 for rendezvouser */ >>> u_int sendsize; >>> @@ -313,13 +316,14 @@ done: >>> return (xprt); >>> } >>> >>> + >>> /*ARGSUSED*/ >>> static bool_t >>> rendezvous_request(xprt, msg) >>> SVCXPRT *xprt; >>> struct rpc_msg *msg; >>> { >>> - int sock, flags; >>> + int sock, flags, nfds, cnt; >>> struct cf_rendezvous *r; >>> struct cf_conn *cd; >>> struct sockaddr_storage addr; >>> @@ -379,6 +383,16 @@ again: >>> >>> gettimeofday(&cd->last_recv_time, NULL); >>> >>> + nfds = svc_open_fds(); >>> + if (nfds >= (_rpc_dtablesize() / 5) * 4) { >>> + /* destroy idle connections */ >>> + cnt = __svc_destroy_idle(15); >>> + if (cnt == 0) { >>> + /* destroy least active */ >>> + __svc_destroy_idle(0); >>> + } >>> + } >>> + >>> return (FALSE); /* there is never an rpc msg to be processed */ >>> } >>> >>> @@ -820,3 +834,49 @@ __svc_clean_idle(fd_set *fds, int timeout, >>> bool_t cleanblock) >>> { >>> return FALSE; >>> } >>> + >>> +static int >>> +__svc_destroy_idle(int timeout) >>> +{ >>> + int i, ncleaned = 0; >>> + SVCXPRT *xprt, *least_active; >>> + struct timeval tv, tdiff, tmax; >>> + struct cf_conn *cd; >>> + >>> + gettimeofday(&tv, NULL); >>> + tmax.tv_sec = tmax.tv_usec = 0; >>> + least_active = NULL; >>> + rwlock_wrlock(&svc_fd_lock); >>> + >>> + for (i = 0; i <= svc_max_pollfd; i++) { >>> + if (svc_pollfd[i].fd == -1) >>> + continue; >>> + xprt = __svc_xports[i]; >>> + if (xprt == NULL || xprt->xp_ops == NULL || >>> + xprt->xp_ops->xp_recv != svc_vc_recv) >>> + continue; >>> + cd = (struct cf_conn *)xprt->xp_p1; >>> + if (!cd->nonblock) >>> + continue; >>> + if (timeout == 0) { >>> + timersub(&tv, &cd->last_recv_time, &tdiff); >>> + if (timercmp(&tdiff, &tmax, >)) { >>> + tmax = tdiff; >>> + least_active = xprt; >>> + } >>> + continue; >>> + } >>> + if (tv.tv_sec - cd->last_recv_time.tv_sec > timeout) { >>> + __xprt_unregister_unlocked(xprt); >>> + __svc_vc_dodestroy(xprt); >>> + ncleaned++; >>> + } >>> + } >>> + if (timeout == 0 && least_active != NULL) { >>> + __xprt_unregister_unlocked(least_active); >>> + __svc_vc_dodestroy(least_active); >>> + ncleaned++; >>> + } >>> + rwlock_unlock(&svc_fd_lock); >>> + return (ncleaned); >>> +} >>> -- >>> 2.26.2 >>> >> -- >> Chuck Lever >> >> >>
On 8/10/21 8:08 PM, Dai Ngo wrote: > Currently svc_run does not handle poll timeout and rendezvous_request > does not handle EMFILE error returned from accept(2 as it used to. > These two missing functionality were removed by commit b2c9430f46c4. > > The effect of not handling poll timeout allows idle TCP conections > to remain ESTABLISHED indefinitely. When the number of connections > reaches the limit of the open file descriptors (ulimit -n) then > accept(2) fails with EMFILE. Since there is no handling of EMFILE > error this causes svc_run() to get in a tight loop calling accept(2). > This resulting in the RPC service of svc_run is being down, it's > no longer able to service any requests. > > if [ $# -ne 3 ]; then > echo "$0: server dst_port conn_cnt" > exit > fi > server=$1 > dport=$2 > conn_cnt=$3 > echo "dport[$dport] server[$server] conn_cnt[$conn_cnt]" > > pcnt=0 > while [ $pcnt -lt $conn_cnt ] > do > nc -v --recv-only $server $dport & > pcnt=`expr $pcnt + 1` > done > > RPC service rpcbind, statd and mountd are effected by this > problem. > > Fix by enhancing rendezvous_request to keep the number of > SVCXPRT conections to 4/5 of the size of the file descriptor > table. When this thresold is reached, it destroys the idle > TCP connections or destroys the least active connection if > no idle connnction was found. > > Fixes: 44bf15b8 rpcbind: don't use obsolete svc_fdset interface of libtirpc > Signed-off-by: dai.ngo@oracle.com Committed... (tag: libtirpc-1-3-3-rc1) steved. > --- > src/svc.c | 17 +++++++++++++- > src/svc_vc.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++- > 2 files changed, 77 insertions(+), 2 deletions(-) > > diff --git a/src/svc.c b/src/svc.c > index 6db164bbd76b..3a8709fe375c 100644 > --- a/src/svc.c > +++ b/src/svc.c > @@ -57,7 +57,7 @@ > > #define max(a, b) (a > b ? a : b) > > -static SVCXPRT **__svc_xports; > +SVCXPRT **__svc_xports; > int __svc_maxrec; > > /* > @@ -194,6 +194,21 @@ __xprt_do_unregister (xprt, dolock) > rwlock_unlock (&svc_fd_lock); > } > > +int > +svc_open_fds() > +{ > + int ix; > + int nfds = 0; > + > + rwlock_rdlock (&svc_fd_lock); > + for (ix = 0; ix < svc_max_pollfd; ++ix) { > + if (svc_pollfd[ix].fd != -1) > + nfds++; > + } > + rwlock_unlock (&svc_fd_lock); > + return (nfds); > +} > + > /* > * Add a service program to the callout list. > * The dispatch routine will be called when a rpc request for this > diff --git a/src/svc_vc.c b/src/svc_vc.c > index f1d9f001fcdc..3dc8a75787e1 100644 > --- a/src/svc_vc.c > +++ b/src/svc_vc.c > @@ -64,6 +64,8 @@ > > > extern rwlock_t svc_fd_lock; > +extern SVCXPRT **__svc_xports; > +extern int svc_open_fds(); > > static SVCXPRT *makefd_xprt(int, u_int, u_int); > static bool_t rendezvous_request(SVCXPRT *, struct rpc_msg *); > @@ -82,6 +84,7 @@ static void svc_vc_ops(SVCXPRT *); > static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in); > static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq, > void *in); > +static int __svc_destroy_idle(int timeout); > > struct cf_rendezvous { /* kept in xprt->xp_p1 for rendezvouser */ > u_int sendsize; > @@ -313,13 +316,14 @@ done: > return (xprt); > } > > + > /*ARGSUSED*/ > static bool_t > rendezvous_request(xprt, msg) > SVCXPRT *xprt; > struct rpc_msg *msg; > { > - int sock, flags; > + int sock, flags, nfds, cnt; > struct cf_rendezvous *r; > struct cf_conn *cd; > struct sockaddr_storage addr; > @@ -379,6 +383,16 @@ again: > > gettimeofday(&cd->last_recv_time, NULL); > > + nfds = svc_open_fds(); > + if (nfds >= (_rpc_dtablesize() / 5) * 4) { > + /* destroy idle connections */ > + cnt = __svc_destroy_idle(15); > + if (cnt == 0) { > + /* destroy least active */ > + __svc_destroy_idle(0); > + } > + } > + > return (FALSE); /* there is never an rpc msg to be processed */ > } > > @@ -820,3 +834,49 @@ __svc_clean_idle(fd_set *fds, int timeout, bool_t cleanblock) > { > return FALSE; > } > + > +static int > +__svc_destroy_idle(int timeout) > +{ > + int i, ncleaned = 0; > + SVCXPRT *xprt, *least_active; > + struct timeval tv, tdiff, tmax; > + struct cf_conn *cd; > + > + gettimeofday(&tv, NULL); > + tmax.tv_sec = tmax.tv_usec = 0; > + least_active = NULL; > + rwlock_wrlock(&svc_fd_lock); > + > + for (i = 0; i <= svc_max_pollfd; i++) { > + if (svc_pollfd[i].fd == -1) > + continue; > + xprt = __svc_xports[i]; > + if (xprt == NULL || xprt->xp_ops == NULL || > + xprt->xp_ops->xp_recv != svc_vc_recv) > + continue; > + cd = (struct cf_conn *)xprt->xp_p1; > + if (!cd->nonblock) > + continue; > + if (timeout == 0) { > + timersub(&tv, &cd->last_recv_time, &tdiff); > + if (timercmp(&tdiff, &tmax, >)) { > + tmax = tdiff; > + least_active = xprt; > + } > + continue; > + } > + if (tv.tv_sec - cd->last_recv_time.tv_sec > timeout) { > + __xprt_unregister_unlocked(xprt); > + __svc_vc_dodestroy(xprt); > + ncleaned++; > + } > + } > + if (timeout == 0 && least_active != NULL) { > + __xprt_unregister_unlocked(least_active); > + __svc_vc_dodestroy(least_active); > + ncleaned++; > + } > + rwlock_unlock(&svc_fd_lock); > + return (ncleaned); > +} >
diff --git a/src/svc.c b/src/svc.c index 6db164bbd76b..3a8709fe375c 100644 --- a/src/svc.c +++ b/src/svc.c @@ -57,7 +57,7 @@ #define max(a, b) (a > b ? a : b) -static SVCXPRT **__svc_xports; +SVCXPRT **__svc_xports; int __svc_maxrec; /* @@ -194,6 +194,21 @@ __xprt_do_unregister (xprt, dolock) rwlock_unlock (&svc_fd_lock); } +int +svc_open_fds() +{ + int ix; + int nfds = 0; + + rwlock_rdlock (&svc_fd_lock); + for (ix = 0; ix < svc_max_pollfd; ++ix) { + if (svc_pollfd[ix].fd != -1) + nfds++; + } + rwlock_unlock (&svc_fd_lock); + return (nfds); +} + /* * Add a service program to the callout list. * The dispatch routine will be called when a rpc request for this diff --git a/src/svc_vc.c b/src/svc_vc.c index f1d9f001fcdc..3dc8a75787e1 100644 --- a/src/svc_vc.c +++ b/src/svc_vc.c @@ -64,6 +64,8 @@ extern rwlock_t svc_fd_lock; +extern SVCXPRT **__svc_xports; +extern int svc_open_fds(); static SVCXPRT *makefd_xprt(int, u_int, u_int); static bool_t rendezvous_request(SVCXPRT *, struct rpc_msg *); @@ -82,6 +84,7 @@ static void svc_vc_ops(SVCXPRT *); static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in); static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq, void *in); +static int __svc_destroy_idle(int timeout); struct cf_rendezvous { /* kept in xprt->xp_p1 for rendezvouser */ u_int sendsize; @@ -313,13 +316,14 @@ done: return (xprt); } + /*ARGSUSED*/ static bool_t rendezvous_request(xprt, msg) SVCXPRT *xprt; struct rpc_msg *msg; { - int sock, flags; + int sock, flags, nfds, cnt; struct cf_rendezvous *r; struct cf_conn *cd; struct sockaddr_storage addr; @@ -379,6 +383,16 @@ again: gettimeofday(&cd->last_recv_time, NULL); + nfds = svc_open_fds(); + if (nfds >= (_rpc_dtablesize() / 5) * 4) { + /* destroy idle connections */ + cnt = __svc_destroy_idle(15); + if (cnt == 0) { + /* destroy least active */ + __svc_destroy_idle(0); + } + } + return (FALSE); /* there is never an rpc msg to be processed */ } @@ -820,3 +834,49 @@ __svc_clean_idle(fd_set *fds, int timeout, bool_t cleanblock) { return FALSE; } + +static int +__svc_destroy_idle(int timeout) +{ + int i, ncleaned = 0; + SVCXPRT *xprt, *least_active; + struct timeval tv, tdiff, tmax; + struct cf_conn *cd; + + gettimeofday(&tv, NULL); + tmax.tv_sec = tmax.tv_usec = 0; + least_active = NULL; + rwlock_wrlock(&svc_fd_lock); + + for (i = 0; i <= svc_max_pollfd; i++) { + if (svc_pollfd[i].fd == -1) + continue; + xprt = __svc_xports[i]; + if (xprt == NULL || xprt->xp_ops == NULL || + xprt->xp_ops->xp_recv != svc_vc_recv) + continue; + cd = (struct cf_conn *)xprt->xp_p1; + if (!cd->nonblock) + continue; + if (timeout == 0) { + timersub(&tv, &cd->last_recv_time, &tdiff); + if (timercmp(&tdiff, &tmax, >)) { + tmax = tdiff; + least_active = xprt; + } + continue; + } + if (tv.tv_sec - cd->last_recv_time.tv_sec > timeout) { + __xprt_unregister_unlocked(xprt); + __svc_vc_dodestroy(xprt); + ncleaned++; + } + } + if (timeout == 0 && least_active != NULL) { + __xprt_unregister_unlocked(least_active); + __svc_vc_dodestroy(least_active); + ncleaned++; + } + rwlock_unlock(&svc_fd_lock); + return (ncleaned); +}
Currently svc_run does not handle poll timeout and rendezvous_request does not handle EMFILE error returned from accept(2 as it used to. These two missing functionality were removed by commit b2c9430f46c4. The effect of not handling poll timeout allows idle TCP conections to remain ESTABLISHED indefinitely. When the number of connections reaches the limit of the open file descriptors (ulimit -n) then accept(2) fails with EMFILE. Since there is no handling of EMFILE error this causes svc_run() to get in a tight loop calling accept(2). This resulting in the RPC service of svc_run is being down, it's no longer able to service any requests. if [ $# -ne 3 ]; then echo "$0: server dst_port conn_cnt" exit fi server=$1 dport=$2 conn_cnt=$3 echo "dport[$dport] server[$server] conn_cnt[$conn_cnt]" pcnt=0 while [ $pcnt -lt $conn_cnt ] do nc -v --recv-only $server $dport & pcnt=`expr $pcnt + 1` done RPC service rpcbind, statd and mountd are effected by this problem. Fix by enhancing rendezvous_request to keep the number of SVCXPRT conections to 4/5 of the size of the file descriptor table. When this thresold is reached, it destroys the idle TCP connections or destroys the least active connection if no idle connnction was found. Fixes: 44bf15b8 rpcbind: don't use obsolete svc_fdset interface of libtirpc Signed-off-by: dai.ngo@oracle.com --- src/svc.c | 17 +++++++++++++- src/svc_vc.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 2 deletions(-)