diff mbox series

[1/1] NFSD: Fix memory shortage problem with Courteous server.

Message ID 1655921718-28842-1-git-send-email-dai.ngo@oracle.com (mailing list archive)
State New, archived
Headers show
Series [1/1] NFSD: Fix memory shortage problem with Courteous server. | expand

Commit Message

Dai Ngo June 22, 2022, 6:15 p.m. UTC
Currently the idle timeout for courtesy client is fixed at 1 day.
If there are lots of courtesy clients remain in the system it can
cause memory resource shortage that effects the operations of other
modules in the kernel. This problem can be observed by running pynfs
nfs4.0 CID5 test in a loop. Eventually system runs out of memory
and rpc.gssd fails to add new watch:

rpc.gssd[3851]: ERROR: inotify_add_watch failed for nfsd4_cb/clnt6c2e:
		No space left on device

and also alloc_inode fails with out of memory:

Call Trace:
<TASK>
        dump_stack_lvl+0x33/0x42
        dump_header+0x4a/0x1ed
        oom_kill_process+0x80/0x10d
        out_of_memory+0x237/0x25f
        __alloc_pages_slowpath.constprop.0+0x617/0x7b6
        __alloc_pages+0x132/0x1e3
        alloc_slab_page+0x15/0x33
        allocate_slab+0x78/0x1ab
        ? alloc_inode+0x38/0x8d
        ___slab_alloc+0x2af/0x373
        ? alloc_inode+0x38/0x8d
        ? slab_pre_alloc_hook.constprop.0+0x9f/0x158
        ? alloc_inode+0x38/0x8d
        __slab_alloc.constprop.0+0x1c/0x24
        kmem_cache_alloc_lru+0x8c/0x142
        alloc_inode+0x38/0x8d
        iget_locked+0x60/0x126
        kernfs_get_inode+0x18/0x105
        kernfs_iop_lookup+0x6d/0xbc
        __lookup_slow+0xb7/0xf9
        lookup_slow+0x3a/0x52
        walk_component+0x90/0x100
        ? inode_permission+0x87/0x128
        link_path_walk.part.0.constprop.0+0x266/0x2ea
        ? path_init+0x101/0x2f2
        path_lookupat+0x4c/0xfa
        filename_lookup+0x63/0xd7
        ? getname_flags+0x32/0x17a
        ? kmem_cache_alloc+0x11f/0x144
        ? getname_flags+0x16c/0x17a
        user_path_at_empty+0x37/0x4b
        do_readlinkat+0x61/0x102
        __x64_sys_readlinkat+0x18/0x1b
        do_syscall_64+0x57/0x72
        entry_SYSCALL_64_after_hwframe+0x46/0xb0
        RIP: 0033:0x7fce5410340e

This patch adds a simple policy to dynamically adjust the idle
timeout based on the percentage of available memory in the system
as follow:

. > 70%    : unlimited. Courtesy clients are allowed to remain valid
             as long as memory availability is above 70%
. 60% - 70%:  1 day.
. 50% - 60%:  1hr
. 40% - 50%:  30mins
. 30% - 40%:  15mins
. < 30%:      disable. Expire all existing courtesy clients and donot
              allow new courtesey client

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/nfs4state.c | 41 +++++++++++++++++++++++++++++++++++++++--
 fs/nfsd/nfsd.h      |  5 ++++-
 2 files changed, 43 insertions(+), 3 deletions(-)

Comments

Chuck Lever June 22, 2022, 6:16 p.m. UTC | #1
> On Jun 22, 2022, at 2:15 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
> 
> Currently the idle timeout for courtesy client is fixed at 1 day.
> If there are lots of courtesy clients remain in the system it can
> cause memory resource shortage that effects the operations of other
> modules in the kernel. This problem can be observed by running pynfs
> nfs4.0 CID5 test in a loop. Eventually system runs out of memory
> and rpc.gssd fails to add new watch:
> 
> rpc.gssd[3851]: ERROR: inotify_add_watch failed for nfsd4_cb/clnt6c2e:
> 		No space left on device
> 
> and also alloc_inode fails with out of memory:
> 
> Call Trace:
> <TASK>
>        dump_stack_lvl+0x33/0x42
>        dump_header+0x4a/0x1ed
>        oom_kill_process+0x80/0x10d
>        out_of_memory+0x237/0x25f
>        __alloc_pages_slowpath.constprop.0+0x617/0x7b6
>        __alloc_pages+0x132/0x1e3
>        alloc_slab_page+0x15/0x33
>        allocate_slab+0x78/0x1ab
>        ? alloc_inode+0x38/0x8d
>        ___slab_alloc+0x2af/0x373
>        ? alloc_inode+0x38/0x8d
>        ? slab_pre_alloc_hook.constprop.0+0x9f/0x158
>        ? alloc_inode+0x38/0x8d
>        __slab_alloc.constprop.0+0x1c/0x24
>        kmem_cache_alloc_lru+0x8c/0x142
>        alloc_inode+0x38/0x8d
>        iget_locked+0x60/0x126
>        kernfs_get_inode+0x18/0x105
>        kernfs_iop_lookup+0x6d/0xbc
>        __lookup_slow+0xb7/0xf9
>        lookup_slow+0x3a/0x52
>        walk_component+0x90/0x100
>        ? inode_permission+0x87/0x128
>        link_path_walk.part.0.constprop.0+0x266/0x2ea
>        ? path_init+0x101/0x2f2
>        path_lookupat+0x4c/0xfa
>        filename_lookup+0x63/0xd7
>        ? getname_flags+0x32/0x17a
>        ? kmem_cache_alloc+0x11f/0x144
>        ? getname_flags+0x16c/0x17a
>        user_path_at_empty+0x37/0x4b
>        do_readlinkat+0x61/0x102
>        __x64_sys_readlinkat+0x18/0x1b
>        do_syscall_64+0x57/0x72
>        entry_SYSCALL_64_after_hwframe+0x46/0xb0
>        RIP: 0033:0x7fce5410340e
> 
> This patch adds a simple policy to dynamically adjust the idle
> timeout based on the percentage of available memory in the system
> as follow:
> 
> . > 70%    : unlimited. Courtesy clients are allowed to remain valid
>             as long as memory availability is above 70%
> . 60% - 70%:  1 day.
> . 50% - 60%:  1hr
> . 40% - 50%:  30mins
> . 30% - 40%:  15mins
> . < 30%:      disable. Expire all existing courtesy clients and donot
>              allow new courtesey client

I thought our plan was to add a shrinker to do this.


> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
> fs/nfsd/nfs4state.c | 41 +++++++++++++++++++++++++++++++++++++++--
> fs/nfsd/nfsd.h      |  5 ++++-
> 2 files changed, 43 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 9409a0dc1b76..a7feea9d07cf 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -5788,12 +5788,47 @@ nfs4_anylock_blockers(struct nfs4_client *clp)
> 	return false;
> }
> 
> +static bool
> +nfs4_allow_courtesy_client(struct nfsd_net *nn, unsigned int *idle_timeout)
> +{
> +	unsigned long avail;
> +	bool ret = true;
> +	unsigned int courtesy_expire = 0;
> +	struct sysinfo si;
> +
> +	si_meminfo(&si);
> +	avail = (si.freeram * 10) / (si.totalram - si.totalhigh);
> +	switch (avail) {
> +	case 7: case 8: case 9: case 10:
> +		courtesy_expire = 0;		/* unlimit */
> +		break;
> +	case 6:
> +		courtesy_expire = NFSD_COURTESY_CLIENT_TO_1DAY;
> +		break;
> +	case 5:
> +		courtesy_expire = NFSD_COURTESY_CLIENT_TO_1HR;
> +		break;
> +	case 4:
> +		courtesy_expire = NFSD_COURTESY_CLIENT_TO_30MINS;
> +		break;
> +	case 3:
> +		courtesy_expire = NFSD_COURTESY_CLIENT_TO_15MINS;
> +		break;
> +	default:
> +		ret = false;			/* disallow CC */
> +	}
> +	*idle_timeout = courtesy_expire;
> +	return ret;
> +}
> +
> static void
> nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
> 				struct laundry_time *lt)
> {
> 	struct list_head *pos, *next;
> 	struct nfs4_client *clp;
> +	unsigned int exptime;
> +	bool allow_cc = nfs4_allow_courtesy_client(nn, &exptime);
> 
> 	INIT_LIST_HEAD(reaplist);
> 	spin_lock(&nn->client_lock);
> @@ -5803,11 +5838,13 @@ nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
> 			goto exp_client;
> 		if (!state_expired(lt, clp->cl_time))
> 			break;
> +		if (!allow_cc)
> +			goto exp_client;
> 		if (!atomic_read(&clp->cl_rpc_users))
> 			clp->cl_state = NFSD4_COURTESY;
> 		if (!client_has_state(clp) ||
> -				ktime_get_boottime_seconds() >=
> -				(clp->cl_time + NFSD_COURTESY_CLIENT_TIMEOUT))
> +				(exptime && ktime_get_boottime_seconds() >=
> +				(clp->cl_time + exptime)))
> 			goto exp_client;
> 		if (nfs4_anylock_blockers(clp)) {
> exp_client:
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 847b482155ae..9d4a5708f852 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -340,7 +340,10 @@ void		nfsd_lockd_shutdown(void);
> #define COMPOUND_ERR_SLACK_SPACE	16     /* OP_SETATTR */
> 
> #define NFSD_LAUNDROMAT_MINTIMEOUT      1   /* seconds */
> -#define	NFSD_COURTESY_CLIENT_TIMEOUT	(24 * 60 * 60)	/* seconds */
> +#define	NFSD_COURTESY_CLIENT_TO_1DAY	(24 * 60 * 60)	/* seconds */
> +#define	NFSD_COURTESY_CLIENT_TO_1HR	(60 * 60)
> +#define	NFSD_COURTESY_CLIENT_TO_30MINS	(30 * 60)
> +#define	NFSD_COURTESY_CLIENT_TO_15MINS	(15 * 60)
> 
> /*
>  * The following attributes are currently not supported by the NFSv4 server:
> -- 
> 2.9.5
> 

--
Chuck Lever
Dai Ngo June 22, 2022, 6:28 p.m. UTC | #2
On 6/22/22 11:16 AM, Chuck Lever III wrote:
>
>> On Jun 22, 2022, at 2:15 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
>>
>> Currently the idle timeout for courtesy client is fixed at 1 day.
>> If there are lots of courtesy clients remain in the system it can
>> cause memory resource shortage that effects the operations of other
>> modules in the kernel. This problem can be observed by running pynfs
>> nfs4.0 CID5 test in a loop. Eventually system runs out of memory
>> and rpc.gssd fails to add new watch:
>>
>> rpc.gssd[3851]: ERROR: inotify_add_watch failed for nfsd4_cb/clnt6c2e:
>> 		No space left on device
>>
>> and also alloc_inode fails with out of memory:
>>
>> Call Trace:
>> <TASK>
>>         dump_stack_lvl+0x33/0x42
>>         dump_header+0x4a/0x1ed
>>         oom_kill_process+0x80/0x10d
>>         out_of_memory+0x237/0x25f
>>         __alloc_pages_slowpath.constprop.0+0x617/0x7b6
>>         __alloc_pages+0x132/0x1e3
>>         alloc_slab_page+0x15/0x33
>>         allocate_slab+0x78/0x1ab
>>         ? alloc_inode+0x38/0x8d
>>         ___slab_alloc+0x2af/0x373
>>         ? alloc_inode+0x38/0x8d
>>         ? slab_pre_alloc_hook.constprop.0+0x9f/0x158
>>         ? alloc_inode+0x38/0x8d
>>         __slab_alloc.constprop.0+0x1c/0x24
>>         kmem_cache_alloc_lru+0x8c/0x142
>>         alloc_inode+0x38/0x8d
>>         iget_locked+0x60/0x126
>>         kernfs_get_inode+0x18/0x105
>>         kernfs_iop_lookup+0x6d/0xbc
>>         __lookup_slow+0xb7/0xf9
>>         lookup_slow+0x3a/0x52
>>         walk_component+0x90/0x100
>>         ? inode_permission+0x87/0x128
>>         link_path_walk.part.0.constprop.0+0x266/0x2ea
>>         ? path_init+0x101/0x2f2
>>         path_lookupat+0x4c/0xfa
>>         filename_lookup+0x63/0xd7
>>         ? getname_flags+0x32/0x17a
>>         ? kmem_cache_alloc+0x11f/0x144
>>         ? getname_flags+0x16c/0x17a
>>         user_path_at_empty+0x37/0x4b
>>         do_readlinkat+0x61/0x102
>>         __x64_sys_readlinkat+0x18/0x1b
>>         do_syscall_64+0x57/0x72
>>         entry_SYSCALL_64_after_hwframe+0x46/0xb0
>>         RIP: 0033:0x7fce5410340e
>>
>> This patch adds a simple policy to dynamically adjust the idle
>> timeout based on the percentage of available memory in the system
>> as follow:
>>
>> . > 70%    : unlimited. Courtesy clients are allowed to remain valid
>>              as long as memory availability is above 70%
>> . 60% - 70%:  1 day.
>> . 50% - 60%:  1hr
>> . 40% - 50%:  30mins
>> . 30% - 40%:  15mins
>> . < 30%:      disable. Expire all existing courtesy clients and donot
>>               allow new courtesey client
> I thought our plan was to add a shrinker to do this.

I'm not familiar with kernel's memory allocation and don't want to muck
with it so I start with this simple approach but I'm open for any suggestion
on how to add a shrinker for this task. Is there any existing model that I
can use as reference?

Thanks,
-Dai

>
>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>> fs/nfsd/nfs4state.c | 41 +++++++++++++++++++++++++++++++++++++++--
>> fs/nfsd/nfsd.h      |  5 ++++-
>> 2 files changed, 43 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 9409a0dc1b76..a7feea9d07cf 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -5788,12 +5788,47 @@ nfs4_anylock_blockers(struct nfs4_client *clp)
>> 	return false;
>> }
>>
>> +static bool
>> +nfs4_allow_courtesy_client(struct nfsd_net *nn, unsigned int *idle_timeout)
>> +{
>> +	unsigned long avail;
>> +	bool ret = true;
>> +	unsigned int courtesy_expire = 0;
>> +	struct sysinfo si;
>> +
>> +	si_meminfo(&si);
>> +	avail = (si.freeram * 10) / (si.totalram - si.totalhigh);
>> +	switch (avail) {
>> +	case 7: case 8: case 9: case 10:
>> +		courtesy_expire = 0;		/* unlimit */
>> +		break;
>> +	case 6:
>> +		courtesy_expire = NFSD_COURTESY_CLIENT_TO_1DAY;
>> +		break;
>> +	case 5:
>> +		courtesy_expire = NFSD_COURTESY_CLIENT_TO_1HR;
>> +		break;
>> +	case 4:
>> +		courtesy_expire = NFSD_COURTESY_CLIENT_TO_30MINS;
>> +		break;
>> +	case 3:
>> +		courtesy_expire = NFSD_COURTESY_CLIENT_TO_15MINS;
>> +		break;
>> +	default:
>> +		ret = false;			/* disallow CC */
>> +	}
>> +	*idle_timeout = courtesy_expire;
>> +	return ret;
>> +}
>> +
>> static void
>> nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
>> 				struct laundry_time *lt)
>> {
>> 	struct list_head *pos, *next;
>> 	struct nfs4_client *clp;
>> +	unsigned int exptime;
>> +	bool allow_cc = nfs4_allow_courtesy_client(nn, &exptime);
>>
>> 	INIT_LIST_HEAD(reaplist);
>> 	spin_lock(&nn->client_lock);
>> @@ -5803,11 +5838,13 @@ nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
>> 			goto exp_client;
>> 		if (!state_expired(lt, clp->cl_time))
>> 			break;
>> +		if (!allow_cc)
>> +			goto exp_client;
>> 		if (!atomic_read(&clp->cl_rpc_users))
>> 			clp->cl_state = NFSD4_COURTESY;
>> 		if (!client_has_state(clp) ||
>> -				ktime_get_boottime_seconds() >=
>> -				(clp->cl_time + NFSD_COURTESY_CLIENT_TIMEOUT))
>> +				(exptime && ktime_get_boottime_seconds() >=
>> +				(clp->cl_time + exptime)))
>> 			goto exp_client;
>> 		if (nfs4_anylock_blockers(clp)) {
>> exp_client:
>> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
>> index 847b482155ae..9d4a5708f852 100644
>> --- a/fs/nfsd/nfsd.h
>> +++ b/fs/nfsd/nfsd.h
>> @@ -340,7 +340,10 @@ void		nfsd_lockd_shutdown(void);
>> #define COMPOUND_ERR_SLACK_SPACE	16     /* OP_SETATTR */
>>
>> #define NFSD_LAUNDROMAT_MINTIMEOUT      1   /* seconds */
>> -#define	NFSD_COURTESY_CLIENT_TIMEOUT	(24 * 60 * 60)	/* seconds */
>> +#define	NFSD_COURTESY_CLIENT_TO_1DAY	(24 * 60 * 60)	/* seconds */
>> +#define	NFSD_COURTESY_CLIENT_TO_1HR	(60 * 60)
>> +#define	NFSD_COURTESY_CLIENT_TO_30MINS	(30 * 60)
>> +#define	NFSD_COURTESY_CLIENT_TO_15MINS	(15 * 60)
>>
>> /*
>>   * The following attributes are currently not supported by the NFSv4 server:
>> -- 
>> 2.9.5
>>
> --
> Chuck Lever
>
>
>
Chuck Lever June 22, 2022, 6:32 p.m. UTC | #3
> On Jun 22, 2022, at 2:28 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
> 
> 
> On 6/22/22 11:16 AM, Chuck Lever III wrote:
>> 
>>> On Jun 22, 2022, at 2:15 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
>>> 
>>> Currently the idle timeout for courtesy client is fixed at 1 day.
>>> If there are lots of courtesy clients remain in the system it can
>>> cause memory resource shortage that effects the operations of other
>>> modules in the kernel. This problem can be observed by running pynfs
>>> nfs4.0 CID5 test in a loop. Eventually system runs out of memory
>>> and rpc.gssd fails to add new watch:
>>> 
>>> rpc.gssd[3851]: ERROR: inotify_add_watch failed for nfsd4_cb/clnt6c2e:
>>> 		No space left on device
>>> 
>>> and also alloc_inode fails with out of memory:
>>> 
>>> Call Trace:
>>> <TASK>
>>> dump_stack_lvl+0x33/0x42
>>> dump_header+0x4a/0x1ed
>>> oom_kill_process+0x80/0x10d
>>> out_of_memory+0x237/0x25f
>>> __alloc_pages_slowpath.constprop.0+0x617/0x7b6
>>> __alloc_pages+0x132/0x1e3
>>> alloc_slab_page+0x15/0x33
>>> allocate_slab+0x78/0x1ab
>>> ? alloc_inode+0x38/0x8d
>>> ___slab_alloc+0x2af/0x373
>>> ? alloc_inode+0x38/0x8d
>>> ? slab_pre_alloc_hook.constprop.0+0x9f/0x158
>>> ? alloc_inode+0x38/0x8d
>>> __slab_alloc.constprop.0+0x1c/0x24
>>> kmem_cache_alloc_lru+0x8c/0x142
>>> alloc_inode+0x38/0x8d
>>> iget_locked+0x60/0x126
>>> kernfs_get_inode+0x18/0x105
>>> kernfs_iop_lookup+0x6d/0xbc
>>> __lookup_slow+0xb7/0xf9
>>> lookup_slow+0x3a/0x52
>>> walk_component+0x90/0x100
>>> ? inode_permission+0x87/0x128
>>> link_path_walk.part.0.constprop.0+0x266/0x2ea
>>> ? path_init+0x101/0x2f2
>>> path_lookupat+0x4c/0xfa
>>> filename_lookup+0x63/0xd7
>>> ? getname_flags+0x32/0x17a
>>> ? kmem_cache_alloc+0x11f/0x144
>>> ? getname_flags+0x16c/0x17a
>>> user_path_at_empty+0x37/0x4b
>>> do_readlinkat+0x61/0x102
>>> __x64_sys_readlinkat+0x18/0x1b
>>> do_syscall_64+0x57/0x72
>>> entry_SYSCALL_64_after_hwframe+0x46/0xb0
>>> RIP: 0033:0x7fce5410340e
>>> 
>>> This patch adds a simple policy to dynamically adjust the idle
>>> timeout based on the percentage of available memory in the system
>>> as follow:
>>> 
>>> . > 70% : unlimited. Courtesy clients are allowed to remain valid
>>> as long as memory availability is above 70%
>>> . 60% - 70%: 1 day.
>>> . 50% - 60%: 1hr
>>> . 40% - 50%: 30mins
>>> . 30% - 40%: 15mins
>>> . < 30%: disable. Expire all existing courtesy clients and donot
>>> allow new courtesey client
>> I thought our plan was to add a shrinker to do this.
> 
> I'm not familiar with kernel's memory allocation and don't want to muck
> with it so I start with this simple approach but I'm open for any suggestion
> on how to add a shrinker for this task. Is there any existing model that I
> can use as reference?

Fortunately there's nothing complicated about using a shrinker.
Look for register_shrinker() calls to see code examples. There
are two already in NFSD itself.


--
Chuck Lever
Dai Ngo June 22, 2022, 6:35 p.m. UTC | #4
On 6/22/22 11:32 AM, Chuck Lever III wrote:
>
>> On Jun 22, 2022, at 2:28 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
>>
>>
>> On 6/22/22 11:16 AM, Chuck Lever III wrote:
>>>> On Jun 22, 2022, at 2:15 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
>>>>
>>>> Currently the idle timeout for courtesy client is fixed at 1 day.
>>>> If there are lots of courtesy clients remain in the system it can
>>>> cause memory resource shortage that effects the operations of other
>>>> modules in the kernel. This problem can be observed by running pynfs
>>>> nfs4.0 CID5 test in a loop. Eventually system runs out of memory
>>>> and rpc.gssd fails to add new watch:
>>>>
>>>> rpc.gssd[3851]: ERROR: inotify_add_watch failed for nfsd4_cb/clnt6c2e:
>>>> 		No space left on device
>>>>
>>>> and also alloc_inode fails with out of memory:
>>>>
>>>> Call Trace:
>>>> <TASK>
>>>> dump_stack_lvl+0x33/0x42
>>>> dump_header+0x4a/0x1ed
>>>> oom_kill_process+0x80/0x10d
>>>> out_of_memory+0x237/0x25f
>>>> __alloc_pages_slowpath.constprop.0+0x617/0x7b6
>>>> __alloc_pages+0x132/0x1e3
>>>> alloc_slab_page+0x15/0x33
>>>> allocate_slab+0x78/0x1ab
>>>> ? alloc_inode+0x38/0x8d
>>>> ___slab_alloc+0x2af/0x373
>>>> ? alloc_inode+0x38/0x8d
>>>> ? slab_pre_alloc_hook.constprop.0+0x9f/0x158
>>>> ? alloc_inode+0x38/0x8d
>>>> __slab_alloc.constprop.0+0x1c/0x24
>>>> kmem_cache_alloc_lru+0x8c/0x142
>>>> alloc_inode+0x38/0x8d
>>>> iget_locked+0x60/0x126
>>>> kernfs_get_inode+0x18/0x105
>>>> kernfs_iop_lookup+0x6d/0xbc
>>>> __lookup_slow+0xb7/0xf9
>>>> lookup_slow+0x3a/0x52
>>>> walk_component+0x90/0x100
>>>> ? inode_permission+0x87/0x128
>>>> link_path_walk.part.0.constprop.0+0x266/0x2ea
>>>> ? path_init+0x101/0x2f2
>>>> path_lookupat+0x4c/0xfa
>>>> filename_lookup+0x63/0xd7
>>>> ? getname_flags+0x32/0x17a
>>>> ? kmem_cache_alloc+0x11f/0x144
>>>> ? getname_flags+0x16c/0x17a
>>>> user_path_at_empty+0x37/0x4b
>>>> do_readlinkat+0x61/0x102
>>>> __x64_sys_readlinkat+0x18/0x1b
>>>> do_syscall_64+0x57/0x72
>>>> entry_SYSCALL_64_after_hwframe+0x46/0xb0
>>>> RIP: 0033:0x7fce5410340e
>>>>
>>>> This patch adds a simple policy to dynamically adjust the idle
>>>> timeout based on the percentage of available memory in the system
>>>> as follow:
>>>>
>>>> . > 70% : unlimited. Courtesy clients are allowed to remain valid
>>>> as long as memory availability is above 70%
>>>> . 60% - 70%: 1 day.
>>>> . 50% - 60%: 1hr
>>>> . 40% - 50%: 30mins
>>>> . 30% - 40%: 15mins
>>>> . < 30%: disable. Expire all existing courtesy clients and donot
>>>> allow new courtesey client
>>> I thought our plan was to add a shrinker to do this.
>> I'm not familiar with kernel's memory allocation and don't want to muck
>> with it so I start with this simple approach but I'm open for any suggestion
>> on how to add a shrinker for this task. Is there any existing model that I
>> can use as reference?
> Fortunately there's nothing complicated about using a shrinker.
> Look for register_shrinker() calls to see code examples. There
> are two already in NFSD itself.

Thanks Chuck, I'll take a look.

-Dai

>
>
> --
> Chuck Lever
>
>
>
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9409a0dc1b76..a7feea9d07cf 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -5788,12 +5788,47 @@  nfs4_anylock_blockers(struct nfs4_client *clp)
 	return false;
 }
 
+static bool
+nfs4_allow_courtesy_client(struct nfsd_net *nn, unsigned int *idle_timeout)
+{
+	unsigned long avail;
+	bool ret = true;
+	unsigned int courtesy_expire = 0;
+	struct sysinfo si;
+
+	si_meminfo(&si);
+	avail = (si.freeram * 10) / (si.totalram - si.totalhigh);
+	switch (avail) {
+	case 7: case 8: case 9: case 10:
+		courtesy_expire = 0;		/* unlimit */
+		break;
+	case 6:
+		courtesy_expire = NFSD_COURTESY_CLIENT_TO_1DAY;
+		break;
+	case 5:
+		courtesy_expire = NFSD_COURTESY_CLIENT_TO_1HR;
+		break;
+	case 4:
+		courtesy_expire = NFSD_COURTESY_CLIENT_TO_30MINS;
+		break;
+	case 3:
+		courtesy_expire = NFSD_COURTESY_CLIENT_TO_15MINS;
+		break;
+	default:
+		ret = false;			/* disallow CC */
+	}
+	*idle_timeout = courtesy_expire;
+	return ret;
+}
+
 static void
 nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
 				struct laundry_time *lt)
 {
 	struct list_head *pos, *next;
 	struct nfs4_client *clp;
+	unsigned int exptime;
+	bool allow_cc = nfs4_allow_courtesy_client(nn, &exptime);
 
 	INIT_LIST_HEAD(reaplist);
 	spin_lock(&nn->client_lock);
@@ -5803,11 +5838,13 @@  nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
 			goto exp_client;
 		if (!state_expired(lt, clp->cl_time))
 			break;
+		if (!allow_cc)
+			goto exp_client;
 		if (!atomic_read(&clp->cl_rpc_users))
 			clp->cl_state = NFSD4_COURTESY;
 		if (!client_has_state(clp) ||
-				ktime_get_boottime_seconds() >=
-				(clp->cl_time + NFSD_COURTESY_CLIENT_TIMEOUT))
+				(exptime && ktime_get_boottime_seconds() >=
+				(clp->cl_time + exptime)))
 			goto exp_client;
 		if (nfs4_anylock_blockers(clp)) {
 exp_client:
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 847b482155ae..9d4a5708f852 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -340,7 +340,10 @@  void		nfsd_lockd_shutdown(void);
 #define COMPOUND_ERR_SLACK_SPACE	16     /* OP_SETATTR */
 
 #define NFSD_LAUNDROMAT_MINTIMEOUT      1   /* seconds */
-#define	NFSD_COURTESY_CLIENT_TIMEOUT	(24 * 60 * 60)	/* seconds */
+#define	NFSD_COURTESY_CLIENT_TO_1DAY	(24 * 60 * 60)	/* seconds */
+#define	NFSD_COURTESY_CLIENT_TO_1HR	(60 * 60)
+#define	NFSD_COURTESY_CLIENT_TO_30MINS	(30 * 60)
+#define	NFSD_COURTESY_CLIENT_TO_15MINS	(15 * 60)
 
 /*
  * The following attributes are currently not supported by the NFSv4 server: