diff mbox series

[v2,1/1] Fix DoS vulnerability in libtirpc

Message ID 20210811000818.95985-1-dai.ngo@oracle.com (mailing list archive)
State New, archived
Headers show
Series [v2,1/1] Fix DoS vulnerability in libtirpc | expand

Commit Message

Dai Ngo Aug. 11, 2021, 12:08 a.m. UTC
Currently svc_run does not handle poll timeout and rendezvous_request
does not handle EMFILE error returned from accept(2 as it used to.
These two missing functionality were removed by commit b2c9430f46c4.

The effect of not handling poll timeout allows idle TCP conections
to remain ESTABLISHED indefinitely. When the number of connections
reaches the limit of the open file descriptors (ulimit -n) then
accept(2) fails with EMFILE. Since there is no handling of EMFILE
error this causes svc_run() to get in a tight loop calling accept(2).
This resulting in the RPC service of svc_run is being down, it's
no longer able to service any requests.

if [ $# -ne 3 ]; then
        echo "$0: server dst_port conn_cnt"
        exit
fi
server=$1
dport=$2
conn_cnt=$3
echo "dport[$dport] server[$server] conn_cnt[$conn_cnt]"

pcnt=0
while [ $pcnt -lt $conn_cnt ]
do
        nc -v --recv-only $server $dport &
        pcnt=`expr $pcnt + 1`
done

RPC service rpcbind, statd and mountd are effected by this
problem.

Fix by enhancing rendezvous_request to keep the number of
SVCXPRT conections to 4/5 of the size of the file descriptor
table. When this thresold is reached, it destroys the idle
TCP connections or destroys the least active connection if
no idle connnction was found.

Fixes: 44bf15b8 rpcbind: don't use obsolete svc_fdset interface of libtirpc
Signed-off-by: dai.ngo@oracle.com
---
 src/svc.c    | 17 +++++++++++++-
 src/svc_vc.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 77 insertions(+), 2 deletions(-)

Comments

Chuck Lever Aug. 11, 2021, 7:43 p.m. UTC | #1
> On Aug 10, 2021, at 8:08 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
> 
> Currently svc_run does not handle poll timeout and rendezvous_request
> does not handle EMFILE error returned from accept(2 as it used to.
> These two missing functionality were removed by commit b2c9430f46c4.
> 
> The effect of not handling poll timeout allows idle TCP conections
> to remain ESTABLISHED indefinitely. When the number of connections
> reaches the limit of the open file descriptors (ulimit -n) then
> accept(2) fails with EMFILE. Since there is no handling of EMFILE
> error this causes svc_run() to get in a tight loop calling accept(2).
> This resulting in the RPC service of svc_run is being down, it's
> no longer able to service any requests.
> 
> if [ $# -ne 3 ]; then
>        echo "$0: server dst_port conn_cnt"
>        exit
> fi
> server=$1
> dport=$2
> conn_cnt=$3
> echo "dport[$dport] server[$server] conn_cnt[$conn_cnt]"
> 
> pcnt=0
> while [ $pcnt -lt $conn_cnt ]
> do
>        nc -v --recv-only $server $dport &
>        pcnt=`expr $pcnt + 1`
> done
> 
> RPC service rpcbind, statd and mountd are effected by this
> problem.
> 
> Fix by enhancing rendezvous_request to keep the number of
> SVCXPRT conections to 4/5 of the size of the file descriptor
> table. When this thresold is reached, it destroys the idle
> TCP connections or destroys the least active connection if
> no idle connnction was found.
> 
> Fixes: 44bf15b8 rpcbind: don't use obsolete svc_fdset interface of libtirpc
> Signed-off-by: dai.ngo@oracle.com

Thanks, Dai, this version makes me feel a lot better.

I didn't look too closely at the new __svc_destroy_idle()
function. I know you based it on __svc_clean_idle(), but
I wonder if we have any regression tests in this area.


> ---
> src/svc.c    | 17 +++++++++++++-
> src/svc_vc.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 77 insertions(+), 2 deletions(-)
> 
> diff --git a/src/svc.c b/src/svc.c
> index 6db164bbd76b..3a8709fe375c 100644
> --- a/src/svc.c
> +++ b/src/svc.c
> @@ -57,7 +57,7 @@
> 
> #define max(a, b) (a > b ? a : b)
> 
> -static SVCXPRT **__svc_xports;
> +SVCXPRT **__svc_xports;
> int __svc_maxrec;
> 
> /*
> @@ -194,6 +194,21 @@ __xprt_do_unregister (xprt, dolock)
>     rwlock_unlock (&svc_fd_lock);
> }
> 
> +int
> +svc_open_fds()
> +{
> +	int ix;
> +	int nfds = 0;
> +
> +	rwlock_rdlock (&svc_fd_lock);
> +	for (ix = 0; ix < svc_max_pollfd; ++ix) {
> +		if (svc_pollfd[ix].fd != -1)
> +			nfds++;
> +	}
> +	rwlock_unlock (&svc_fd_lock);
> +	return (nfds);
> +}
> +
> /*
>  * Add a service program to the callout list.
>  * The dispatch routine will be called when a rpc request for this
> diff --git a/src/svc_vc.c b/src/svc_vc.c
> index f1d9f001fcdc..3dc8a75787e1 100644
> --- a/src/svc_vc.c
> +++ b/src/svc_vc.c
> @@ -64,6 +64,8 @@
> 
> 
> extern rwlock_t svc_fd_lock;
> +extern SVCXPRT **__svc_xports;
> +extern int svc_open_fds();
> 
> static SVCXPRT *makefd_xprt(int, u_int, u_int);
> static bool_t rendezvous_request(SVCXPRT *, struct rpc_msg *);
> @@ -82,6 +84,7 @@ static void svc_vc_ops(SVCXPRT *);
> static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in);
> static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq,
> 				   	     void *in);
> +static int __svc_destroy_idle(int timeout);
> 
> struct cf_rendezvous { /* kept in xprt->xp_p1 for rendezvouser */
> 	u_int sendsize;
> @@ -313,13 +316,14 @@ done:
> 	return (xprt);
> }
> 
> +
> /*ARGSUSED*/
> static bool_t
> rendezvous_request(xprt, msg)
> 	SVCXPRT *xprt;
> 	struct rpc_msg *msg;
> {
> -	int sock, flags;
> +	int sock, flags, nfds, cnt;
> 	struct cf_rendezvous *r;
> 	struct cf_conn *cd;
> 	struct sockaddr_storage addr;
> @@ -379,6 +383,16 @@ again:
> 
> 	gettimeofday(&cd->last_recv_time, NULL);
> 
> +	nfds = svc_open_fds();
> +	if (nfds >= (_rpc_dtablesize() / 5) * 4) {
> +		/* destroy idle connections */
> +		cnt = __svc_destroy_idle(15);
> +		if (cnt == 0) {
> +			/* destroy least active */
> +			__svc_destroy_idle(0);
> +		}
> +	}
> +
> 	return (FALSE); /* there is never an rpc msg to be processed */
> }
> 
> @@ -820,3 +834,49 @@ __svc_clean_idle(fd_set *fds, int timeout, bool_t cleanblock)
> {
> 	return FALSE;
> }
> +
> +static int
> +__svc_destroy_idle(int timeout)
> +{
> +	int i, ncleaned = 0;
> +	SVCXPRT *xprt, *least_active;
> +	struct timeval tv, tdiff, tmax;
> +	struct cf_conn *cd;
> +
> +	gettimeofday(&tv, NULL);
> +	tmax.tv_sec = tmax.tv_usec = 0;
> +	least_active = NULL;
> +	rwlock_wrlock(&svc_fd_lock);
> +
> +	for (i = 0; i <= svc_max_pollfd; i++) {
> +		if (svc_pollfd[i].fd == -1)
> +			continue;
> +		xprt = __svc_xports[i];
> +		if (xprt == NULL || xprt->xp_ops == NULL ||
> +			xprt->xp_ops->xp_recv != svc_vc_recv)
> +			continue;
> +		cd = (struct cf_conn *)xprt->xp_p1;
> +		if (!cd->nonblock)
> +			continue;
> +		if (timeout == 0) {
> +			timersub(&tv, &cd->last_recv_time, &tdiff);
> +			if (timercmp(&tdiff, &tmax, >)) {
> +				tmax = tdiff;
> +				least_active = xprt;
> +			}
> +			continue;
> +		}
> +		if (tv.tv_sec - cd->last_recv_time.tv_sec > timeout) {
> +			__xprt_unregister_unlocked(xprt);
> +			__svc_vc_dodestroy(xprt);
> +			ncleaned++;
> +		}
> +	}
> +	if (timeout == 0 && least_active != NULL) {
> +		__xprt_unregister_unlocked(least_active);
> +		__svc_vc_dodestroy(least_active);
> +		ncleaned++;
> +	}
> +	rwlock_unlock(&svc_fd_lock);
> +	return (ncleaned);
> +}
> -- 
> 2.26.2
> 

--
Chuck Lever
Dai Ngo Aug. 11, 2021, 9:16 p.m. UTC | #2
On 8/11/21 12:43 PM, Chuck Lever III wrote:
>
>> On Aug 10, 2021, at 8:08 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
>>
>> Currently svc_run does not handle poll timeout and rendezvous_request
>> does not handle EMFILE error returned from accept(2 as it used to.
>> These two missing functionality were removed by commit b2c9430f46c4.
>>
>> The effect of not handling poll timeout allows idle TCP conections
>> to remain ESTABLISHED indefinitely. When the number of connections
>> reaches the limit of the open file descriptors (ulimit -n) then
>> accept(2) fails with EMFILE. Since there is no handling of EMFILE
>> error this causes svc_run() to get in a tight loop calling accept(2).
>> This resulting in the RPC service of svc_run is being down, it's
>> no longer able to service any requests.
>>
>> if [ $# -ne 3 ]; then
>>         echo "$0: server dst_port conn_cnt"
>>         exit
>> fi
>> server=$1
>> dport=$2
>> conn_cnt=$3
>> echo "dport[$dport] server[$server] conn_cnt[$conn_cnt]"
>>
>> pcnt=0
>> while [ $pcnt -lt $conn_cnt ]
>> do
>>         nc -v --recv-only $server $dport &
>>         pcnt=`expr $pcnt + 1`
>> done
>>
>> RPC service rpcbind, statd and mountd are effected by this
>> problem.
>>
>> Fix by enhancing rendezvous_request to keep the number of
>> SVCXPRT conections to 4/5 of the size of the file descriptor
>> table. When this thresold is reached, it destroys the idle
>> TCP connections or destroys the least active connection if
>> no idle connnction was found.
>>
>> Fixes: 44bf15b8 rpcbind: don't use obsolete svc_fdset interface of libtirpc
>> Signed-off-by: dai.ngo@oracle.com
> Thanks, Dai, this version makes me feel a lot better.
>
> I didn't look too closely at the new __svc_destroy_idle()
> function. I know you based it on __svc_clean_idle(), but
> I wonder if we have any regression tests in this area.

Thank you Chuck for your review.

So far I used the 'nc' tool to verify rpcbind, statd and mountd
no longer have this problem. I also verified that NFSv3 and NFSv4.x
mount also work while the tests were running. I don't know of any
tests specifically designed for this but I will look around to see
if we can use some NFS tests that would exercise these services.

-Dai
  

>
>
>> ---
>> src/svc.c    | 17 +++++++++++++-
>> src/svc_vc.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>> 2 files changed, 77 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/svc.c b/src/svc.c
>> index 6db164bbd76b..3a8709fe375c 100644
>> --- a/src/svc.c
>> +++ b/src/svc.c
>> @@ -57,7 +57,7 @@
>>
>> #define max(a, b) (a > b ? a : b)
>>
>> -static SVCXPRT **__svc_xports;
>> +SVCXPRT **__svc_xports;
>> int __svc_maxrec;
>>
>> /*
>> @@ -194,6 +194,21 @@ __xprt_do_unregister (xprt, dolock)
>>      rwlock_unlock (&svc_fd_lock);
>> }
>>
>> +int
>> +svc_open_fds()
>> +{
>> +	int ix;
>> +	int nfds = 0;
>> +
>> +	rwlock_rdlock (&svc_fd_lock);
>> +	for (ix = 0; ix < svc_max_pollfd; ++ix) {
>> +		if (svc_pollfd[ix].fd != -1)
>> +			nfds++;
>> +	}
>> +	rwlock_unlock (&svc_fd_lock);
>> +	return (nfds);
>> +}
>> +
>> /*
>>   * Add a service program to the callout list.
>>   * The dispatch routine will be called when a rpc request for this
>> diff --git a/src/svc_vc.c b/src/svc_vc.c
>> index f1d9f001fcdc..3dc8a75787e1 100644
>> --- a/src/svc_vc.c
>> +++ b/src/svc_vc.c
>> @@ -64,6 +64,8 @@
>>
>>
>> extern rwlock_t svc_fd_lock;
>> +extern SVCXPRT **__svc_xports;
>> +extern int svc_open_fds();
>>
>> static SVCXPRT *makefd_xprt(int, u_int, u_int);
>> static bool_t rendezvous_request(SVCXPRT *, struct rpc_msg *);
>> @@ -82,6 +84,7 @@ static void svc_vc_ops(SVCXPRT *);
>> static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in);
>> static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq,
>> 				   	     void *in);
>> +static int __svc_destroy_idle(int timeout);
>>
>> struct cf_rendezvous { /* kept in xprt->xp_p1 for rendezvouser */
>> 	u_int sendsize;
>> @@ -313,13 +316,14 @@ done:
>> 	return (xprt);
>> }
>>
>> +
>> /*ARGSUSED*/
>> static bool_t
>> rendezvous_request(xprt, msg)
>> 	SVCXPRT *xprt;
>> 	struct rpc_msg *msg;
>> {
>> -	int sock, flags;
>> +	int sock, flags, nfds, cnt;
>> 	struct cf_rendezvous *r;
>> 	struct cf_conn *cd;
>> 	struct sockaddr_storage addr;
>> @@ -379,6 +383,16 @@ again:
>>
>> 	gettimeofday(&cd->last_recv_time, NULL);
>>
>> +	nfds = svc_open_fds();
>> +	if (nfds >= (_rpc_dtablesize() / 5) * 4) {
>> +		/* destroy idle connections */
>> +		cnt = __svc_destroy_idle(15);
>> +		if (cnt == 0) {
>> +			/* destroy least active */
>> +			__svc_destroy_idle(0);
>> +		}
>> +	}
>> +
>> 	return (FALSE); /* there is never an rpc msg to be processed */
>> }
>>
>> @@ -820,3 +834,49 @@ __svc_clean_idle(fd_set *fds, int timeout, bool_t cleanblock)
>> {
>> 	return FALSE;
>> }
>> +
>> +static int
>> +__svc_destroy_idle(int timeout)
>> +{
>> +	int i, ncleaned = 0;
>> +	SVCXPRT *xprt, *least_active;
>> +	struct timeval tv, tdiff, tmax;
>> +	struct cf_conn *cd;
>> +
>> +	gettimeofday(&tv, NULL);
>> +	tmax.tv_sec = tmax.tv_usec = 0;
>> +	least_active = NULL;
>> +	rwlock_wrlock(&svc_fd_lock);
>> +
>> +	for (i = 0; i <= svc_max_pollfd; i++) {
>> +		if (svc_pollfd[i].fd == -1)
>> +			continue;
>> +		xprt = __svc_xports[i];
>> +		if (xprt == NULL || xprt->xp_ops == NULL ||
>> +			xprt->xp_ops->xp_recv != svc_vc_recv)
>> +			continue;
>> +		cd = (struct cf_conn *)xprt->xp_p1;
>> +		if (!cd->nonblock)
>> +			continue;
>> +		if (timeout == 0) {
>> +			timersub(&tv, &cd->last_recv_time, &tdiff);
>> +			if (timercmp(&tdiff, &tmax, >)) {
>> +				tmax = tdiff;
>> +				least_active = xprt;
>> +			}
>> +			continue;
>> +		}
>> +		if (tv.tv_sec - cd->last_recv_time.tv_sec > timeout) {
>> +			__xprt_unregister_unlocked(xprt);
>> +			__svc_vc_dodestroy(xprt);
>> +			ncleaned++;
>> +		}
>> +	}
>> +	if (timeout == 0 && least_active != NULL) {
>> +		__xprt_unregister_unlocked(least_active);
>> +		__svc_vc_dodestroy(least_active);
>> +		ncleaned++;
>> +	}
>> +	rwlock_unlock(&svc_fd_lock);
>> +	return (ncleaned);
>> +}
>> -- 
>> 2.26.2
>>
> --
> Chuck Lever
>
>
>
Dai Ngo Aug. 16, 2021, 4:25 p.m. UTC | #3
Hi Chuck,

On 8/11/21 2:16 PM, dai.ngo@oracle.com wrote:
>
> On 8/11/21 12:43 PM, Chuck Lever III wrote:
>>
>>> On Aug 10, 2021, at 8:08 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
>>>
>>> Currently svc_run does not handle poll timeout and rendezvous_request
>>> does not handle EMFILE error returned from accept(2 as it used to.
>>> These two missing functionality were removed by commit b2c9430f46c4.
>>>
>>> The effect of not handling poll timeout allows idle TCP conections
>>> to remain ESTABLISHED indefinitely. When the number of connections
>>> reaches the limit of the open file descriptors (ulimit -n) then
>>> accept(2) fails with EMFILE. Since there is no handling of EMFILE
>>> error this causes svc_run() to get in a tight loop calling accept(2).
>>> This resulting in the RPC service of svc_run is being down, it's
>>> no longer able to service any requests.
>>>
>>> if [ $# -ne 3 ]; then
>>>         echo "$0: server dst_port conn_cnt"
>>>         exit
>>> fi
>>> server=$1
>>> dport=$2
>>> conn_cnt=$3
>>> echo "dport[$dport] server[$server] conn_cnt[$conn_cnt]"
>>>
>>> pcnt=0
>>> while [ $pcnt -lt $conn_cnt ]
>>> do
>>>         nc -v --recv-only $server $dport &
>>>         pcnt=`expr $pcnt + 1`
>>> done
>>>
>>> RPC service rpcbind, statd and mountd are effected by this
>>> problem.
>>>
>>> Fix by enhancing rendezvous_request to keep the number of
>>> SVCXPRT conections to 4/5 of the size of the file descriptor
>>> table. When this thresold is reached, it destroys the idle
>>> TCP connections or destroys the least active connection if
>>> no idle connnction was found.
>>>
>>> Fixes: 44bf15b8 rpcbind: don't use obsolete svc_fdset interface of 
>>> libtirpc
>>> Signed-off-by: dai.ngo@oracle.com
>> Thanks, Dai, this version makes me feel a lot better.
>>
>> I didn't look too closely at the new __svc_destroy_idle()
>> function. I know you based it on __svc_clean_idle(), but
>> I wonder if we have any regression tests in this area.
>
> Thank you Chuck for your review.
>
> So far I used the 'nc' tool to verify rpcbind, statd and mountd
> no longer have this problem. I also verified that NFSv3 and NFSv4.x
> mount also work while the tests were running. I don't know of any
> tests specifically designed for this but I will look around to see
> if we can use some NFS tests that would exercise these services.

I ran the rpc, ti-rpc and ts-rpc tests of the LTP testsuite with and
without the patch. The results are the same, there is no regression.

-Dai

>
> -Dai
>
>
>>
>>
>>> ---
>>> src/svc.c    | 17 +++++++++++++-
>>> src/svc_vc.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>>> 2 files changed, 77 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/src/svc.c b/src/svc.c
>>> index 6db164bbd76b..3a8709fe375c 100644
>>> --- a/src/svc.c
>>> +++ b/src/svc.c
>>> @@ -57,7 +57,7 @@
>>>
>>> #define max(a, b) (a > b ? a : b)
>>>
>>> -static SVCXPRT **__svc_xports;
>>> +SVCXPRT **__svc_xports;
>>> int __svc_maxrec;
>>>
>>> /*
>>> @@ -194,6 +194,21 @@ __xprt_do_unregister (xprt, dolock)
>>>      rwlock_unlock (&svc_fd_lock);
>>> }
>>>
>>> +int
>>> +svc_open_fds()
>>> +{
>>> +    int ix;
>>> +    int nfds = 0;
>>> +
>>> +    rwlock_rdlock (&svc_fd_lock);
>>> +    for (ix = 0; ix < svc_max_pollfd; ++ix) {
>>> +        if (svc_pollfd[ix].fd != -1)
>>> +            nfds++;
>>> +    }
>>> +    rwlock_unlock (&svc_fd_lock);
>>> +    return (nfds);
>>> +}
>>> +
>>> /*
>>>   * Add a service program to the callout list.
>>>   * The dispatch routine will be called when a rpc request for this
>>> diff --git a/src/svc_vc.c b/src/svc_vc.c
>>> index f1d9f001fcdc..3dc8a75787e1 100644
>>> --- a/src/svc_vc.c
>>> +++ b/src/svc_vc.c
>>> @@ -64,6 +64,8 @@
>>>
>>>
>>> extern rwlock_t svc_fd_lock;
>>> +extern SVCXPRT **__svc_xports;
>>> +extern int svc_open_fds();
>>>
>>> static SVCXPRT *makefd_xprt(int, u_int, u_int);
>>> static bool_t rendezvous_request(SVCXPRT *, struct rpc_msg *);
>>> @@ -82,6 +84,7 @@ static void svc_vc_ops(SVCXPRT *);
>>> static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in);
>>> static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq,
>>>                             void *in);
>>> +static int __svc_destroy_idle(int timeout);
>>>
>>> struct cf_rendezvous { /* kept in xprt->xp_p1 for rendezvouser */
>>>     u_int sendsize;
>>> @@ -313,13 +316,14 @@ done:
>>>     return (xprt);
>>> }
>>>
>>> +
>>> /*ARGSUSED*/
>>> static bool_t
>>> rendezvous_request(xprt, msg)
>>>     SVCXPRT *xprt;
>>>     struct rpc_msg *msg;
>>> {
>>> -    int sock, flags;
>>> +    int sock, flags, nfds, cnt;
>>>     struct cf_rendezvous *r;
>>>     struct cf_conn *cd;
>>>     struct sockaddr_storage addr;
>>> @@ -379,6 +383,16 @@ again:
>>>
>>>     gettimeofday(&cd->last_recv_time, NULL);
>>>
>>> +    nfds = svc_open_fds();
>>> +    if (nfds >= (_rpc_dtablesize() / 5) * 4) {
>>> +        /* destroy idle connections */
>>> +        cnt = __svc_destroy_idle(15);
>>> +        if (cnt == 0) {
>>> +            /* destroy least active */
>>> +            __svc_destroy_idle(0);
>>> +        }
>>> +    }
>>> +
>>>     return (FALSE); /* there is never an rpc msg to be processed */
>>> }
>>>
>>> @@ -820,3 +834,49 @@ __svc_clean_idle(fd_set *fds, int timeout, 
>>> bool_t cleanblock)
>>> {
>>>     return FALSE;
>>> }
>>> +
>>> +static int
>>> +__svc_destroy_idle(int timeout)
>>> +{
>>> +    int i, ncleaned = 0;
>>> +    SVCXPRT *xprt, *least_active;
>>> +    struct timeval tv, tdiff, tmax;
>>> +    struct cf_conn *cd;
>>> +
>>> +    gettimeofday(&tv, NULL);
>>> +    tmax.tv_sec = tmax.tv_usec = 0;
>>> +    least_active = NULL;
>>> +    rwlock_wrlock(&svc_fd_lock);
>>> +
>>> +    for (i = 0; i <= svc_max_pollfd; i++) {
>>> +        if (svc_pollfd[i].fd == -1)
>>> +            continue;
>>> +        xprt = __svc_xports[i];
>>> +        if (xprt == NULL || xprt->xp_ops == NULL ||
>>> +            xprt->xp_ops->xp_recv != svc_vc_recv)
>>> +            continue;
>>> +        cd = (struct cf_conn *)xprt->xp_p1;
>>> +        if (!cd->nonblock)
>>> +            continue;
>>> +        if (timeout == 0) {
>>> +            timersub(&tv, &cd->last_recv_time, &tdiff);
>>> +            if (timercmp(&tdiff, &tmax, >)) {
>>> +                tmax = tdiff;
>>> +                least_active = xprt;
>>> +            }
>>> +            continue;
>>> +        }
>>> +        if (tv.tv_sec - cd->last_recv_time.tv_sec > timeout) {
>>> +            __xprt_unregister_unlocked(xprt);
>>> +            __svc_vc_dodestroy(xprt);
>>> +            ncleaned++;
>>> +        }
>>> +    }
>>> +    if (timeout == 0 && least_active != NULL) {
>>> +        __xprt_unregister_unlocked(least_active);
>>> +        __svc_vc_dodestroy(least_active);
>>> +        ncleaned++;
>>> +    }
>>> +    rwlock_unlock(&svc_fd_lock);
>>> +    return (ncleaned);
>>> +}
>>> -- 
>>> 2.26.2
>>>
>> -- 
>> Chuck Lever
>>
>>
>>
Steve Dickson Aug. 21, 2021, 5:21 p.m. UTC | #4
On 8/10/21 8:08 PM, Dai Ngo wrote:
> Currently svc_run does not handle poll timeout and rendezvous_request
> does not handle EMFILE error returned from accept(2 as it used to.
> These two missing functionality were removed by commit b2c9430f46c4.
> 
> The effect of not handling poll timeout allows idle TCP conections
> to remain ESTABLISHED indefinitely. When the number of connections
> reaches the limit of the open file descriptors (ulimit -n) then
> accept(2) fails with EMFILE. Since there is no handling of EMFILE
> error this causes svc_run() to get in a tight loop calling accept(2).
> This resulting in the RPC service of svc_run is being down, it's
> no longer able to service any requests.
> 
> if [ $# -ne 3 ]; then
>          echo "$0: server dst_port conn_cnt"
>          exit
> fi
> server=$1
> dport=$2
> conn_cnt=$3
> echo "dport[$dport] server[$server] conn_cnt[$conn_cnt]"
> 
> pcnt=0
> while [ $pcnt -lt $conn_cnt ]
> do
>          nc -v --recv-only $server $dport &
>          pcnt=`expr $pcnt + 1`
> done
> 
> RPC service rpcbind, statd and mountd are effected by this
> problem.
> 
> Fix by enhancing rendezvous_request to keep the number of
> SVCXPRT conections to 4/5 of the size of the file descriptor
> table. When this thresold is reached, it destroys the idle
> TCP connections or destroys the least active connection if
> no idle connnction was found.
> 
> Fixes: 44bf15b8 rpcbind: don't use obsolete svc_fdset interface of libtirpc
> Signed-off-by: dai.ngo@oracle.com
Committed... (tag: libtirpc-1-3-3-rc1)

steved.
> ---
>   src/svc.c    | 17 +++++++++++++-
>   src/svc_vc.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>   2 files changed, 77 insertions(+), 2 deletions(-)
> 
> diff --git a/src/svc.c b/src/svc.c
> index 6db164bbd76b..3a8709fe375c 100644
> --- a/src/svc.c
> +++ b/src/svc.c
> @@ -57,7 +57,7 @@
>   
>   #define max(a, b) (a > b ? a : b)
>   
> -static SVCXPRT **__svc_xports;
> +SVCXPRT **__svc_xports;
>   int __svc_maxrec;
>   
>   /*
> @@ -194,6 +194,21 @@ __xprt_do_unregister (xprt, dolock)
>       rwlock_unlock (&svc_fd_lock);
>   }
>   
> +int
> +svc_open_fds()
> +{
> +	int ix;
> +	int nfds = 0;
> +
> +	rwlock_rdlock (&svc_fd_lock);
> +	for (ix = 0; ix < svc_max_pollfd; ++ix) {
> +		if (svc_pollfd[ix].fd != -1)
> +			nfds++;
> +	}
> +	rwlock_unlock (&svc_fd_lock);
> +	return (nfds);
> +}
> +
>   /*
>    * Add a service program to the callout list.
>    * The dispatch routine will be called when a rpc request for this
> diff --git a/src/svc_vc.c b/src/svc_vc.c
> index f1d9f001fcdc..3dc8a75787e1 100644
> --- a/src/svc_vc.c
> +++ b/src/svc_vc.c
> @@ -64,6 +64,8 @@
>   
>   
>   extern rwlock_t svc_fd_lock;
> +extern SVCXPRT **__svc_xports;
> +extern int svc_open_fds();
>   
>   static SVCXPRT *makefd_xprt(int, u_int, u_int);
>   static bool_t rendezvous_request(SVCXPRT *, struct rpc_msg *);
> @@ -82,6 +84,7 @@ static void svc_vc_ops(SVCXPRT *);
>   static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in);
>   static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq,
>   				   	     void *in);
> +static int __svc_destroy_idle(int timeout);
>   
>   struct cf_rendezvous { /* kept in xprt->xp_p1 for rendezvouser */
>   	u_int sendsize;
> @@ -313,13 +316,14 @@ done:
>   	return (xprt);
>   }
>   
> +
>   /*ARGSUSED*/
>   static bool_t
>   rendezvous_request(xprt, msg)
>   	SVCXPRT *xprt;
>   	struct rpc_msg *msg;
>   {
> -	int sock, flags;
> +	int sock, flags, nfds, cnt;
>   	struct cf_rendezvous *r;
>   	struct cf_conn *cd;
>   	struct sockaddr_storage addr;
> @@ -379,6 +383,16 @@ again:
>   
>   	gettimeofday(&cd->last_recv_time, NULL);
>   
> +	nfds = svc_open_fds();
> +	if (nfds >= (_rpc_dtablesize() / 5) * 4) {
> +		/* destroy idle connections */
> +		cnt = __svc_destroy_idle(15);
> +		if (cnt == 0) {
> +			/* destroy least active */
> +			__svc_destroy_idle(0);
> +		}
> +	}
> +
>   	return (FALSE); /* there is never an rpc msg to be processed */
>   }
>   
> @@ -820,3 +834,49 @@ __svc_clean_idle(fd_set *fds, int timeout, bool_t cleanblock)
>   {
>   	return FALSE;
>   }
> +
> +static int
> +__svc_destroy_idle(int timeout)
> +{
> +	int i, ncleaned = 0;
> +	SVCXPRT *xprt, *least_active;
> +	struct timeval tv, tdiff, tmax;
> +	struct cf_conn *cd;
> +
> +	gettimeofday(&tv, NULL);
> +	tmax.tv_sec = tmax.tv_usec = 0;
> +	least_active = NULL;
> +	rwlock_wrlock(&svc_fd_lock);
> +
> +	for (i = 0; i <= svc_max_pollfd; i++) {
> +		if (svc_pollfd[i].fd == -1)
> +			continue;
> +		xprt = __svc_xports[i];
> +		if (xprt == NULL || xprt->xp_ops == NULL ||
> +			xprt->xp_ops->xp_recv != svc_vc_recv)
> +			continue;
> +		cd = (struct cf_conn *)xprt->xp_p1;
> +		if (!cd->nonblock)
> +			continue;
> +		if (timeout == 0) {
> +			timersub(&tv, &cd->last_recv_time, &tdiff);
> +			if (timercmp(&tdiff, &tmax, >)) {
> +				tmax = tdiff;
> +				least_active = xprt;
> +			}
> +			continue;
> +		}
> +		if (tv.tv_sec - cd->last_recv_time.tv_sec > timeout) {
> +			__xprt_unregister_unlocked(xprt);
> +			__svc_vc_dodestroy(xprt);
> +			ncleaned++;
> +		}
> +	}
> +	if (timeout == 0 && least_active != NULL) {
> +		__xprt_unregister_unlocked(least_active);
> +		__svc_vc_dodestroy(least_active);
> +		ncleaned++;
> +	}
> +	rwlock_unlock(&svc_fd_lock);
> +	return (ncleaned);
> +}
>
diff mbox series

Patch

diff --git a/src/svc.c b/src/svc.c
index 6db164bbd76b..3a8709fe375c 100644
--- a/src/svc.c
+++ b/src/svc.c
@@ -57,7 +57,7 @@ 
 
 #define max(a, b) (a > b ? a : b)
 
-static SVCXPRT **__svc_xports;
+SVCXPRT **__svc_xports;
 int __svc_maxrec;
 
 /*
@@ -194,6 +194,21 @@  __xprt_do_unregister (xprt, dolock)
     rwlock_unlock (&svc_fd_lock);
 }
 
+int
+svc_open_fds()
+{
+	int ix;
+	int nfds = 0;
+
+	rwlock_rdlock (&svc_fd_lock);
+	for (ix = 0; ix < svc_max_pollfd; ++ix) {
+		if (svc_pollfd[ix].fd != -1)
+			nfds++;
+	}
+	rwlock_unlock (&svc_fd_lock);
+	return (nfds);
+}
+
 /*
  * Add a service program to the callout list.
  * The dispatch routine will be called when a rpc request for this
diff --git a/src/svc_vc.c b/src/svc_vc.c
index f1d9f001fcdc..3dc8a75787e1 100644
--- a/src/svc_vc.c
+++ b/src/svc_vc.c
@@ -64,6 +64,8 @@ 
 
 
 extern rwlock_t svc_fd_lock;
+extern SVCXPRT **__svc_xports;
+extern int svc_open_fds();
 
 static SVCXPRT *makefd_xprt(int, u_int, u_int);
 static bool_t rendezvous_request(SVCXPRT *, struct rpc_msg *);
@@ -82,6 +84,7 @@  static void svc_vc_ops(SVCXPRT *);
 static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in);
 static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq,
 				   	     void *in);
+static int __svc_destroy_idle(int timeout);
 
 struct cf_rendezvous { /* kept in xprt->xp_p1 for rendezvouser */
 	u_int sendsize;
@@ -313,13 +316,14 @@  done:
 	return (xprt);
 }
 
+
 /*ARGSUSED*/
 static bool_t
 rendezvous_request(xprt, msg)
 	SVCXPRT *xprt;
 	struct rpc_msg *msg;
 {
-	int sock, flags;
+	int sock, flags, nfds, cnt;
 	struct cf_rendezvous *r;
 	struct cf_conn *cd;
 	struct sockaddr_storage addr;
@@ -379,6 +383,16 @@  again:
 
 	gettimeofday(&cd->last_recv_time, NULL);
 
+	nfds = svc_open_fds();
+	if (nfds >= (_rpc_dtablesize() / 5) * 4) {
+		/* destroy idle connections */
+		cnt = __svc_destroy_idle(15);
+		if (cnt == 0) {
+			/* destroy least active */
+			__svc_destroy_idle(0);
+		}
+	}
+
 	return (FALSE); /* there is never an rpc msg to be processed */
 }
 
@@ -820,3 +834,49 @@  __svc_clean_idle(fd_set *fds, int timeout, bool_t cleanblock)
 {
 	return FALSE;
 }
+
+static int
+__svc_destroy_idle(int timeout)
+{
+	int i, ncleaned = 0;
+	SVCXPRT *xprt, *least_active;
+	struct timeval tv, tdiff, tmax;
+	struct cf_conn *cd;
+
+	gettimeofday(&tv, NULL);
+	tmax.tv_sec = tmax.tv_usec = 0;
+	least_active = NULL;
+	rwlock_wrlock(&svc_fd_lock);
+
+	for (i = 0; i <= svc_max_pollfd; i++) {
+		if (svc_pollfd[i].fd == -1)
+			continue;
+		xprt = __svc_xports[i];
+		if (xprt == NULL || xprt->xp_ops == NULL ||
+			xprt->xp_ops->xp_recv != svc_vc_recv)
+			continue;
+		cd = (struct cf_conn *)xprt->xp_p1;
+		if (!cd->nonblock)
+			continue;
+		if (timeout == 0) {
+			timersub(&tv, &cd->last_recv_time, &tdiff);
+			if (timercmp(&tdiff, &tmax, >)) {
+				tmax = tdiff;
+				least_active = xprt;
+			}
+			continue;
+		}
+		if (tv.tv_sec - cd->last_recv_time.tv_sec > timeout) {
+			__xprt_unregister_unlocked(xprt);
+			__svc_vc_dodestroy(xprt);
+			ncleaned++;
+		}
+	}
+	if (timeout == 0 && least_active != NULL) {
+		__xprt_unregister_unlocked(least_active);
+		__svc_vc_dodestroy(least_active);
+		ncleaned++;
+	}
+	rwlock_unlock(&svc_fd_lock);
+	return (ncleaned);
+}