diff mbox series

[2/2] NFSD: add delegation shrinker to react to low memory condition

Message ID 1666070139-18843-3-git-send-email-dai.ngo@oracle.com (mailing list archive)
State New, archived
Headers show
Series add support for CB_RECALL_ANY and the delegation shrinker | expand

Commit Message

Dai Ngo Oct. 18, 2022, 5:15 a.m. UTC
The delegation shrinker is scheduled to run on every shrinker's
'count' callback. It scans the client list and sends the
courtesy CB_RECALL_ANY to the clients that hold delegations.

To avoid flooding the clients with CB_RECALL_ANY requests, the
delegation shrinker is scheduled to run after a 5 seconds delay.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/netns.h     |  3 +++
 fs/nfsd/nfs4state.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/nfsd/state.h     |  1 +
 3 files changed, 73 insertions(+), 1 deletion(-)

Comments

Jeff Layton Oct. 18, 2022, 1:07 p.m. UTC | #1
On Mon, 2022-10-17 at 22:15 -0700, Dai Ngo wrote:
> The delegation shrinker is scheduled to run on every shrinker's
> 'count' callback. It scans the client list and sends the
> courtesy CB_RECALL_ANY to the clients that hold delegations.
> 
> To avoid flooding the clients with CB_RECALL_ANY requests, the
> delegation shrinker is scheduled to run after a 5 seconds delay.
> 

But...when the kernel needs memory, it generally needs it _now_, and 5s
is a long time. It'd be better to not delay the initial sending of
CB_RECALL_ANY callbacks this much.

Maybe the logic should be changed such that it runs no more frequently
than once every 5s, but don't delay the initial sending of
CB_RECALL_ANYs.

> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
>  fs/nfsd/netns.h     |  3 +++
>  fs/nfsd/nfs4state.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  fs/nfsd/state.h     |  1 +
>  3 files changed, 73 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index 8c854ba3285b..394a05fb46d8 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -196,6 +196,9 @@ struct nfsd_net {
>  	atomic_t		nfsd_courtesy_clients;
>  	struct shrinker		nfsd_client_shrinker;
>  	struct delayed_work	nfsd_shrinker_work;
> +
> +	struct shrinker		nfsd_deleg_shrinker;
> +	struct delayed_work	nfsd_deleg_shrinker_work;
>  };
>  
>  /* Simple check to find out if a given net was properly initialized */
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index c60c937dece6..bdaea0e6e72f 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -4392,11 +4392,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
>  	return SHRINK_STOP;
>  }
>  
> +static unsigned long
> +nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> +{
> +	unsigned long cnt;
> +	struct nfsd_net *nn = container_of(shrink,
> +				struct nfsd_net, nfsd_deleg_shrinker);
> +
> +	cnt = atomic_long_read(&num_delegations);
> +	if (cnt)
> +		mod_delayed_work(laundry_wq,
> +			&nn->nfsd_deleg_shrinker_work, 5 * HZ);
> +	return cnt;
> +}
> +

What's the rationale for doing all of this in the count callback? Why
not just return the value here and leave scheduling to the scan
callback?

> +static unsigned long
> +nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> +{
> +	return SHRINK_STOP;
> +}
> +
>  int
>  nfsd4_init_leases_net(struct nfsd_net *nn)
>  {
>  	struct sysinfo si;
>  	u64 max_clients;
> +	int retval;
>  
>  	nn->nfsd4_lease = 90;	/* default lease time */
>  	nn->nfsd4_grace = 90;
> @@ -4417,13 +4438,23 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
>  	nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
>  	nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
>  	nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
> -	return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> +	retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> +	if (retval)
> +		return retval;
> +	nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
> +	nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
> +	nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
> +	retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
> +	if (retval)
> +		unregister_shrinker(&nn->nfsd_client_shrinker);
> +	return retval;
>  }
>  
>  void
>  nfsd4_leases_net_shutdown(struct nfsd_net *nn)
>  {
>  	unregister_shrinker(&nn->nfsd_client_shrinker);
> +	unregister_shrinker(&nn->nfsd_deleg_shrinker);
>  }
>  
>  static void init_nfs4_replay(struct nfs4_replay *rp)
> @@ -6162,6 +6193,42 @@ courtesy_client_reaper(struct work_struct *reaper)
>  	nfs4_process_client_reaplist(&reaplist);
>  }
>  
> +static void
> +deleg_reaper(struct work_struct *deleg_work)
> +{
> +	struct list_head *pos, *next;
> +	struct nfs4_client *clp;
> +	struct list_head cblist;
> +	struct delayed_work *dwork = to_delayed_work(deleg_work);
> +	struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> +					nfsd_deleg_shrinker_work);
> +
> +	INIT_LIST_HEAD(&cblist);
> +	spin_lock(&nn->client_lock);
> +	list_for_each_safe(pos, next, &nn->client_lru) {
> +		clp = list_entry(pos, struct nfs4_client, cl_lru);
> +		if (clp->cl_state != NFSD4_ACTIVE ||
> +				list_empty(&clp->cl_delegations) ||
> +				atomic_read(&clp->cl_delegs_in_recall) ||
> +				clp->cl_recall_any_busy) {
> +			continue;
> +		}
> +		clp->cl_recall_any_busy = true;
> +		list_add(&clp->cl_recall_any_cblist, &cblist);
> +
> +		/* release in nfsd4_cb_recall_any_release */
> +		atomic_inc(&clp->cl_rpc_users);
> +	}
> +	spin_unlock(&nn->client_lock);
> +	list_for_each_safe(pos, next, &cblist) {
> +		clp = list_entry(pos, struct nfs4_client, cl_recall_any_cblist);
> +		list_del_init(&clp->cl_recall_any_cblist);
> +		clp->cl_recall_any_bm = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> +						BIT(RCA4_TYPE_MASK_WDATA_DLG);
> +		nfsd4_run_cb(&clp->cl_recall_any);
> +	}
> +}
> +
>  static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
>  {
>  	if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
> @@ -7985,6 +8052,7 @@ static int nfs4_state_create_net(struct net *net)
>  
>  	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
>  	INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
> +	INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
>  	get_net(net);
>  
>  	return 0;
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 49ca06169642..05b572ce6605 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -415,6 +415,7 @@ struct nfs4_client {
>  	bool			cl_recall_any_busy;
>  	uint32_t		cl_recall_any_bm;
>  	struct nfsd4_callback	cl_recall_any;
> +	struct list_head	cl_recall_any_cblist;
>  };
>  
>  /* struct nfs4_client_reset
Dai Ngo Oct. 18, 2022, 3:51 p.m. UTC | #2
On 10/18/22 6:07 AM, Jeff Layton wrote:
> On Mon, 2022-10-17 at 22:15 -0700, Dai Ngo wrote:
>> The delegation shrinker is scheduled to run on every shrinker's
>> 'count' callback. It scans the client list and sends the
>> courtesy CB_RECALL_ANY to the clients that hold delegations.
>>
>> To avoid flooding the clients with CB_RECALL_ANY requests, the
>> delegation shrinker is scheduled to run after a 5 seconds delay.
>>
> But...when the kernel needs memory, it generally needs it _now_, and 5s
> is a long time. It'd be better to not delay the initial sending of
> CB_RECALL_ANY callbacks this much.

yes, makes sense.

>
> Maybe the logic should be changed such that it runs no more frequently
> than once every 5s, but don't delay the initial sending of
> CB_RECALL_ANYs.

I'll make this adjustment.

Thanks,
-Dai

>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>>   fs/nfsd/netns.h     |  3 +++
>>   fs/nfsd/nfs4state.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>>   fs/nfsd/state.h     |  1 +
>>   3 files changed, 73 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
>> index 8c854ba3285b..394a05fb46d8 100644
>> --- a/fs/nfsd/netns.h
>> +++ b/fs/nfsd/netns.h
>> @@ -196,6 +196,9 @@ struct nfsd_net {
>>   	atomic_t		nfsd_courtesy_clients;
>>   	struct shrinker		nfsd_client_shrinker;
>>   	struct delayed_work	nfsd_shrinker_work;
>> +
>> +	struct shrinker		nfsd_deleg_shrinker;
>> +	struct delayed_work	nfsd_deleg_shrinker_work;
>>   };
>>   
>>   /* Simple check to find out if a given net was properly initialized */
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index c60c937dece6..bdaea0e6e72f 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -4392,11 +4392,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
>>   	return SHRINK_STOP;
>>   }
>>   
>> +static unsigned long
>> +nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
>> +{
>> +	unsigned long cnt;
>> +	struct nfsd_net *nn = container_of(shrink,
>> +				struct nfsd_net, nfsd_deleg_shrinker);
>> +
>> +	cnt = atomic_long_read(&num_delegations);
>> +	if (cnt)
>> +		mod_delayed_work(laundry_wq,
>> +			&nn->nfsd_deleg_shrinker_work, 5 * HZ);
>> +	return cnt;
>> +}
>> +
> What's the rationale for doing all of this in the count callback? Why
> not just return the value here and leave scheduling to the scan
> callback?
>
>> +static unsigned long
>> +nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
>> +{
>> +	return SHRINK_STOP;
>> +}
>> +
>>   int
>>   nfsd4_init_leases_net(struct nfsd_net *nn)
>>   {
>>   	struct sysinfo si;
>>   	u64 max_clients;
>> +	int retval;
>>   
>>   	nn->nfsd4_lease = 90;	/* default lease time */
>>   	nn->nfsd4_grace = 90;
>> @@ -4417,13 +4438,23 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
>>   	nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
>>   	nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
>>   	nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
>> -	return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
>> +	retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
>> +	if (retval)
>> +		return retval;
>> +	nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
>> +	nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
>> +	nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
>> +	retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
>> +	if (retval)
>> +		unregister_shrinker(&nn->nfsd_client_shrinker);
>> +	return retval;
>>   }
>>   
>>   void
>>   nfsd4_leases_net_shutdown(struct nfsd_net *nn)
>>   {
>>   	unregister_shrinker(&nn->nfsd_client_shrinker);
>> +	unregister_shrinker(&nn->nfsd_deleg_shrinker);
>>   }
>>   
>>   static void init_nfs4_replay(struct nfs4_replay *rp)
>> @@ -6162,6 +6193,42 @@ courtesy_client_reaper(struct work_struct *reaper)
>>   	nfs4_process_client_reaplist(&reaplist);
>>   }
>>   
>> +static void
>> +deleg_reaper(struct work_struct *deleg_work)
>> +{
>> +	struct list_head *pos, *next;
>> +	struct nfs4_client *clp;
>> +	struct list_head cblist;
>> +	struct delayed_work *dwork = to_delayed_work(deleg_work);
>> +	struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
>> +					nfsd_deleg_shrinker_work);
>> +
>> +	INIT_LIST_HEAD(&cblist);
>> +	spin_lock(&nn->client_lock);
>> +	list_for_each_safe(pos, next, &nn->client_lru) {
>> +		clp = list_entry(pos, struct nfs4_client, cl_lru);
>> +		if (clp->cl_state != NFSD4_ACTIVE ||
>> +				list_empty(&clp->cl_delegations) ||
>> +				atomic_read(&clp->cl_delegs_in_recall) ||
>> +				clp->cl_recall_any_busy) {
>> +			continue;
>> +		}
>> +		clp->cl_recall_any_busy = true;
>> +		list_add(&clp->cl_recall_any_cblist, &cblist);
>> +
>> +		/* release in nfsd4_cb_recall_any_release */
>> +		atomic_inc(&clp->cl_rpc_users);
>> +	}
>> +	spin_unlock(&nn->client_lock);
>> +	list_for_each_safe(pos, next, &cblist) {
>> +		clp = list_entry(pos, struct nfs4_client, cl_recall_any_cblist);
>> +		list_del_init(&clp->cl_recall_any_cblist);
>> +		clp->cl_recall_any_bm = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>> +						BIT(RCA4_TYPE_MASK_WDATA_DLG);
>> +		nfsd4_run_cb(&clp->cl_recall_any);
>> +	}
>> +}
>> +
>>   static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
>>   {
>>   	if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
>> @@ -7985,6 +8052,7 @@ static int nfs4_state_create_net(struct net *net)
>>   
>>   	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
>>   	INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
>> +	INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
>>   	get_net(net);
>>   
>>   	return 0;
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index 49ca06169642..05b572ce6605 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -415,6 +415,7 @@ struct nfs4_client {
>>   	bool			cl_recall_any_busy;
>>   	uint32_t		cl_recall_any_bm;
>>   	struct nfsd4_callback	cl_recall_any;
>> +	struct list_head	cl_recall_any_cblist;
>>   };
>>   
>>   /* struct nfs4_client_reset
diff mbox series

Patch

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 8c854ba3285b..394a05fb46d8 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -196,6 +196,9 @@  struct nfsd_net {
 	atomic_t		nfsd_courtesy_clients;
 	struct shrinker		nfsd_client_shrinker;
 	struct delayed_work	nfsd_shrinker_work;
+
+	struct shrinker		nfsd_deleg_shrinker;
+	struct delayed_work	nfsd_deleg_shrinker_work;
 };
 
 /* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c60c937dece6..bdaea0e6e72f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4392,11 +4392,32 @@  nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
 	return SHRINK_STOP;
 }
 
+static unsigned long
+nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+	unsigned long cnt;
+	struct nfsd_net *nn = container_of(shrink,
+				struct nfsd_net, nfsd_deleg_shrinker);
+
+	cnt = atomic_long_read(&num_delegations);
+	if (cnt)
+		mod_delayed_work(laundry_wq,
+			&nn->nfsd_deleg_shrinker_work, 5 * HZ);
+	return cnt;
+}
+
+static unsigned long
+nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+	return SHRINK_STOP;
+}
+
 int
 nfsd4_init_leases_net(struct nfsd_net *nn)
 {
 	struct sysinfo si;
 	u64 max_clients;
+	int retval;
 
 	nn->nfsd4_lease = 90;	/* default lease time */
 	nn->nfsd4_grace = 90;
@@ -4417,13 +4438,23 @@  nfsd4_init_leases_net(struct nfsd_net *nn)
 	nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
 	nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
 	nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
-	return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+	retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+	if (retval)
+		return retval;
+	nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
+	nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
+	nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
+	retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
+	if (retval)
+		unregister_shrinker(&nn->nfsd_client_shrinker);
+	return retval;
 }
 
 void
 nfsd4_leases_net_shutdown(struct nfsd_net *nn)
 {
 	unregister_shrinker(&nn->nfsd_client_shrinker);
+	unregister_shrinker(&nn->nfsd_deleg_shrinker);
 }
 
 static void init_nfs4_replay(struct nfs4_replay *rp)
@@ -6162,6 +6193,42 @@  courtesy_client_reaper(struct work_struct *reaper)
 	nfs4_process_client_reaplist(&reaplist);
 }
 
+static void
+deleg_reaper(struct work_struct *deleg_work)
+{
+	struct list_head *pos, *next;
+	struct nfs4_client *clp;
+	struct list_head cblist;
+	struct delayed_work *dwork = to_delayed_work(deleg_work);
+	struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+					nfsd_deleg_shrinker_work);
+
+	INIT_LIST_HEAD(&cblist);
+	spin_lock(&nn->client_lock);
+	list_for_each_safe(pos, next, &nn->client_lru) {
+		clp = list_entry(pos, struct nfs4_client, cl_lru);
+		if (clp->cl_state != NFSD4_ACTIVE ||
+				list_empty(&clp->cl_delegations) ||
+				atomic_read(&clp->cl_delegs_in_recall) ||
+				clp->cl_recall_any_busy) {
+			continue;
+		}
+		clp->cl_recall_any_busy = true;
+		list_add(&clp->cl_recall_any_cblist, &cblist);
+
+		/* release in nfsd4_cb_recall_any_release */
+		atomic_inc(&clp->cl_rpc_users);
+	}
+	spin_unlock(&nn->client_lock);
+	list_for_each_safe(pos, next, &cblist) {
+		clp = list_entry(pos, struct nfs4_client, cl_recall_any_cblist);
+		list_del_init(&clp->cl_recall_any_cblist);
+		clp->cl_recall_any_bm = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
+						BIT(RCA4_TYPE_MASK_WDATA_DLG);
+		nfsd4_run_cb(&clp->cl_recall_any);
+	}
+}
+
 static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
 {
 	if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
@@ -7985,6 +8052,7 @@  static int nfs4_state_create_net(struct net *net)
 
 	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
 	INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
+	INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
 	get_net(net);
 
 	return 0;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 49ca06169642..05b572ce6605 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -415,6 +415,7 @@  struct nfs4_client {
 	bool			cl_recall_any_busy;
 	uint32_t		cl_recall_any_bm;
 	struct nfsd4_callback	cl_recall_any;
+	struct list_head	cl_recall_any_cblist;
 };
 
 /* struct nfs4_client_reset