diff mbox series

[v4,2/3] NFSD: add delegation shrinker to react to low memory condition

Message ID 1668053831-7662-3-git-send-email-dai.ngo@oracle.com (mailing list archive)
State New, archived
Headers show
Series add support for CB_RECALL_ANY and the delegation shrinker | expand

Commit Message

Dai Ngo Nov. 10, 2022, 4:17 a.m. UTC
The delegation shrinker is scheduled to run on every shrinker's
'count' callback. It scans the client list and sends the
courtesy CB_RECALL_ANY to the clients that hold delegations.

To avoid flooding the clients with CB_RECALL_ANY requests, the
delegation shrinker is scheduled to run after a 5 seconds delay.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/netns.h     |   3 ++
 fs/nfsd/nfs4state.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/nfsd/state.h     |   8 ++++
 3 files changed, 125 insertions(+), 1 deletion(-)

Comments

Chuck Lever III Nov. 11, 2022, 3:25 p.m. UTC | #1
> On Nov 9, 2022, at 11:17 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
> 
> The delegation shrinker is scheduled to run on every shrinker's
> 'count' callback. It scans the client list and sends the
> courtesy CB_RECALL_ANY to the clients that hold delegations.
> 
> To avoid flooding the clients with CB_RECALL_ANY requests, the
> delegation shrinker is scheduled to run after a 5 seconds delay.
> 
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
> fs/nfsd/netns.h     |   3 ++
> fs/nfsd/nfs4state.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/nfsd/state.h     |   8 ++++
> 3 files changed, 125 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index 8c854ba3285b..394a05fb46d8 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -196,6 +196,9 @@ struct nfsd_net {
> 	atomic_t		nfsd_courtesy_clients;
> 	struct shrinker		nfsd_client_shrinker;
> 	struct delayed_work	nfsd_shrinker_work;
> +
> +	struct shrinker		nfsd_deleg_shrinker;
> +	struct delayed_work	nfsd_deleg_shrinker_work;
> };
> 
> /* Simple check to find out if a given net was properly initialized */
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 4e718500a00c..813cdb67b370 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2131,6 +2131,7 @@ static void __free_client(struct kref *k)
> 	kfree(clp->cl_nii_domain.data);
> 	kfree(clp->cl_nii_name.data);
> 	idr_destroy(&clp->cl_stateids);
> +	kfree(clp->cl_ra);
> 	kmem_cache_free(client_slab, clp);
> }
> 
> @@ -2854,6 +2855,36 @@ static const struct tree_descr client_files[] = {
> 	[3] = {""},
> };
> 
> +static int
> +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> +				struct rpc_task *task)
> +{
> +	switch (task->tk_status) {
> +	case -NFS4ERR_DELAY:
> +		rpc_delay(task, 2 * HZ);
> +		return 0;
> +	default:
> +		return 1;
> +	}
> +}
> +
> +static void
> +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> +{
> +	struct nfs4_client *clp = cb->cb_clp;
> +	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
> +
> +	spin_lock(&nn->client_lock);
> +	clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> +	put_client_renew_locked(clp);
> +	spin_unlock(&nn->client_lock);
> +}
> +
> +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> +	.done		= nfsd4_cb_recall_any_done,
> +	.release	= nfsd4_cb_recall_any_release,
> +};
> +
> static struct nfs4_client *create_client(struct xdr_netobj name,
> 		struct svc_rqst *rqstp, nfs4_verifier *verf)
> {
> @@ -2891,6 +2922,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
> 		free_client(clp);
> 		return NULL;
> 	}
> +	clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
> +	if (!clp->cl_ra) {
> +		free_client(clp);
> +		return NULL;
> +	}
> +	clp->cl_ra_time = 0;
> +	nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
> +			NFSPROC4_CLNT_CB_RECALL_ANY);
> 	return clp;
> }
> 
> @@ -4365,11 +4404,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
> 	return SHRINK_STOP;
> }
> 
> +static unsigned long
> +nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> +{
> +	unsigned long cnt;

No reason not to spell out "count".

> +	struct nfsd_net *nn = container_of(shrink,
> +				struct nfsd_net, nfsd_deleg_shrinker);
> +
> +	cnt = atomic_long_read(&num_delegations);
> +	if (cnt)
> +		mod_delayed_work(laundry_wq,
> +			&nn->nfsd_deleg_shrinker_work, 0);
> +	return cnt;
> +}
> +
> +static unsigned long
> +nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> +{
> +	return SHRINK_STOP;
> +}
> +
> int
> nfsd4_init_leases_net(struct nfsd_net *nn)
> {
> 	struct sysinfo si;
> 	u64 max_clients;
> +	int retval;
> 
> 	nn->nfsd4_lease = 90;	/* default lease time */
> 	nn->nfsd4_grace = 90;
> @@ -4390,13 +4450,24 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
> 	nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
> 	nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
> 	nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
> -	return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> +	retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> +	if (retval)
> +		return retval;
> +	nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
> +	nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
> +	nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
> +	retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
> +	if (retval)
> +		unregister_shrinker(&nn->nfsd_client_shrinker);
> +	return retval;
> +
> }
> 
> void
> nfsd4_leases_net_shutdown(struct nfsd_net *nn)
> {
> 	unregister_shrinker(&nn->nfsd_client_shrinker);
> +	unregister_shrinker(&nn->nfsd_deleg_shrinker);
> }
> 
> static void init_nfs4_replay(struct nfs4_replay *rp)
> @@ -6135,6 +6206,47 @@ courtesy_client_reaper(struct work_struct *reaper)
> 	nfs4_process_client_reaplist(&reaplist);
> }
> 
> +static void
> +deleg_reaper(struct work_struct *deleg_work)
> +{
> +	struct list_head *pos, *next;
> +	struct nfs4_client *clp;
> +	struct list_head cblist;
> +	struct delayed_work *dwork = to_delayed_work(deleg_work);
> +	struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> +					nfsd_deleg_shrinker_work);
> +
> +	INIT_LIST_HEAD(&cblist);
> +	spin_lock(&nn->client_lock);
> +	list_for_each_safe(pos, next, &nn->client_lru) {
> +		clp = list_entry(pos, struct nfs4_client, cl_lru);
> +		if (clp->cl_state != NFSD4_ACTIVE ||
> +			list_empty(&clp->cl_delegations) ||
> +			atomic_read(&clp->cl_delegs_in_recall) ||
> +			test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
> +			(ktime_get_boottime_seconds() -
> +				clp->cl_ra_time < 5)) {
> +			continue;
> +		}
> +		list_add(&clp->cl_ra_cblist, &cblist);
> +
> +		/* release in nfsd4_cb_recall_any_release */
> +		atomic_inc(&clp->cl_rpc_users);
> +		set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> +		clp->cl_ra_time = ktime_get_boottime_seconds();
> +	}
> +	spin_unlock(&nn->client_lock);
> +	list_for_each_safe(pos, next, &cblist) {

The usual idiom for draining a list like this is

	while (!list_empty(&cblist)) {
		clp = list_first_entry( ... );


> +		clp = list_entry(pos, struct nfs4_client, cl_ra_cblist);
> +		list_del_init(&clp->cl_ra_cblist);
> +		clp->cl_ra->ra_keep = 0;
> +		clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> +						BIT(RCA4_TYPE_MASK_WDATA_DLG);
> +		nfsd4_run_cb(&clp->cl_ra->ra_cb);
> +	}
> +
> +}
> +
> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> {
> 	if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
> @@ -7958,6 +8070,7 @@ static int nfs4_state_create_net(struct net *net)
> 
> 	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
> 	INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
> +	INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
> 	get_net(net);
> 
> 	return 0;
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 6b33cbbe76d3..12ce9792c5b6 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -368,6 +368,7 @@ struct nfs4_client {
> #define NFSD4_CLIENT_UPCALL_LOCK	(5)	/* upcall serialization */
> #define NFSD4_CLIENT_CB_FLAG_MASK	(1 << NFSD4_CLIENT_CB_UPDATE | \
> 					 1 << NFSD4_CLIENT_CB_KILL)
> +#define	NFSD4_CLIENT_CB_RECALL_ANY	(6)
> 	unsigned long		cl_flags;
> 	const struct cred	*cl_cb_cred;
> 	struct rpc_clnt		*cl_cb_client;
> @@ -411,6 +412,10 @@ struct nfs4_client {
> 
> 	unsigned int		cl_state;
> 	atomic_t		cl_delegs_in_recall;
> +
> +	struct nfsd4_cb_recall_any	*cl_ra;
> +	time64_t		cl_ra_time;
> +	struct list_head	cl_ra_cblist;
> };
> 
> /* struct nfs4_client_reset
> @@ -642,6 +647,9 @@ enum nfsd4_cb_op {
> 	NFSPROC4_CLNT_CB_RECALL_ANY,
> };
> 
> +#define	RCA4_TYPE_MASK_RDATA_DLG	0
> +#define	RCA4_TYPE_MASK_WDATA_DLG	1
> +
> /* Returns true iff a is later than b: */
> static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> {
> -- 
> 2.9.5
> 

--
Chuck Lever
Jeff Layton Nov. 11, 2022, 3:58 p.m. UTC | #2
On Fri, 2022-11-11 at 15:25 +0000, Chuck Lever III wrote:
> 
> > On Nov 9, 2022, at 11:17 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
> > 
> > The delegation shrinker is scheduled to run on every shrinker's
> > 'count' callback. It scans the client list and sends the
> > courtesy CB_RECALL_ANY to the clients that hold delegations.
> > 
> > To avoid flooding the clients with CB_RECALL_ANY requests, the
> > delegation shrinker is scheduled to run after a 5 seconds delay.
> > 
> > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > ---
> > fs/nfsd/netns.h     |   3 ++
> > fs/nfsd/nfs4state.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> > fs/nfsd/state.h     |   8 ++++
> > 3 files changed, 125 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> > index 8c854ba3285b..394a05fb46d8 100644
> > --- a/fs/nfsd/netns.h
> > +++ b/fs/nfsd/netns.h
> > @@ -196,6 +196,9 @@ struct nfsd_net {
> > 	atomic_t		nfsd_courtesy_clients;
> > 	struct shrinker		nfsd_client_shrinker;
> > 	struct delayed_work	nfsd_shrinker_work;
> > +
> > +	struct shrinker		nfsd_deleg_shrinker;
> > +	struct delayed_work	nfsd_deleg_shrinker_work;
> > };
> > 
> > /* Simple check to find out if a given net was properly initialized */
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index 4e718500a00c..813cdb67b370 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -2131,6 +2131,7 @@ static void __free_client(struct kref *k)
> > 	kfree(clp->cl_nii_domain.data);
> > 	kfree(clp->cl_nii_name.data);
> > 	idr_destroy(&clp->cl_stateids);
> > +	kfree(clp->cl_ra);
> > 	kmem_cache_free(client_slab, clp);
> > }
> > 
> > @@ -2854,6 +2855,36 @@ static const struct tree_descr client_files[] = {
> > 	[3] = {""},
> > };
> > 
> > +static int
> > +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> > +				struct rpc_task *task)
> > +{
> > +	switch (task->tk_status) {
> > +	case -NFS4ERR_DELAY:
> > +		rpc_delay(task, 2 * HZ);
> > +		return 0;
> > +	default:
> > +		return 1;
> > +	}
> > +}
> > +
> > +static void
> > +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> > +{
> > +	struct nfs4_client *clp = cb->cb_clp;
> > +	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
> > +
> > +	spin_lock(&nn->client_lock);
> > +	clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> > +	put_client_renew_locked(clp);
> > +	spin_unlock(&nn->client_lock);
> > +}
> > +
> > +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> > +	.done		= nfsd4_cb_recall_any_done,
> > +	.release	= nfsd4_cb_recall_any_release,
> > +};
> > +
> > static struct nfs4_client *create_client(struct xdr_netobj name,
> > 		struct svc_rqst *rqstp, nfs4_verifier *verf)
> > {
> > @@ -2891,6 +2922,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
> > 		free_client(clp);
> > 		return NULL;
> > 	}
> > +	clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
> > +	if (!clp->cl_ra) {
> > +		free_client(clp);
> > +		return NULL;
> > +	}
> > +	clp->cl_ra_time = 0;
> > +	nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
> > +			NFSPROC4_CLNT_CB_RECALL_ANY);
> > 	return clp;
> > }
> > 
> > @@ -4365,11 +4404,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
> > 	return SHRINK_STOP;
> > }
> > 
> > +static unsigned long
> > +nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> > +{
> > +	unsigned long cnt;
> 
> No reason not to spell out "count".
> 
> > +	struct nfsd_net *nn = container_of(shrink,
> > +				struct nfsd_net, nfsd_deleg_shrinker);
> > +
> > +	cnt = atomic_long_read(&num_delegations);
> > +	if (cnt)
> > +		mod_delayed_work(laundry_wq,
> > +			&nn->nfsd_deleg_shrinker_work, 0);
> > +	return cnt;
> > +}
> > +
> > +static unsigned long
> > +nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> > +{
> > +	return SHRINK_STOP;
> > +}
> > +
> > int
> > nfsd4_init_leases_net(struct nfsd_net *nn)
> > {
> > 	struct sysinfo si;
> > 	u64 max_clients;
> > +	int retval;
> > 
> > 	nn->nfsd4_lease = 90;	/* default lease time */
> > 	nn->nfsd4_grace = 90;
> > @@ -4390,13 +4450,24 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
> > 	nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
> > 	nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
> > 	nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
> > -	return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> > +	retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> > +	if (retval)
> > +		return retval;
> > +	nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
> > +	nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
> > +	nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
> > +	retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
> > +	if (retval)
> > +		unregister_shrinker(&nn->nfsd_client_shrinker);
> > +	return retval;
> > +
> > }
> > 
> > void
> > nfsd4_leases_net_shutdown(struct nfsd_net *nn)
> > {
> > 	unregister_shrinker(&nn->nfsd_client_shrinker);
> > +	unregister_shrinker(&nn->nfsd_deleg_shrinker);
> > }
> > 
> > static void init_nfs4_replay(struct nfs4_replay *rp)
> > @@ -6135,6 +6206,47 @@ courtesy_client_reaper(struct work_struct *reaper)
> > 	nfs4_process_client_reaplist(&reaplist);
> > }
> > 
> > +static void
> > +deleg_reaper(struct work_struct *deleg_work)
> > +{
> > +	struct list_head *pos, *next;
> > +	struct nfs4_client *clp;
> > +	struct list_head cblist;
> > +	struct delayed_work *dwork = to_delayed_work(deleg_work);
> > +	struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> > +					nfsd_deleg_shrinker_work);
> > +
> > +	INIT_LIST_HEAD(&cblist);
> > +	spin_lock(&nn->client_lock);
> > +	list_for_each_safe(pos, next, &nn->client_lru) {
> > +		clp = list_entry(pos, struct nfs4_client, cl_lru);
> > +		if (clp->cl_state != NFSD4_ACTIVE ||
> > +			list_empty(&clp->cl_delegations) ||
> > +			atomic_read(&clp->cl_delegs_in_recall) ||
> > +			test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
> > +			(ktime_get_boottime_seconds() -
> > +				clp->cl_ra_time < 5)) {
> > +			continue;
> > +		}
> > +		list_add(&clp->cl_ra_cblist, &cblist);
> > +
> > +		/* release in nfsd4_cb_recall_any_release */
> > +		atomic_inc(&clp->cl_rpc_users);
> > +		set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> > +		clp->cl_ra_time = ktime_get_boottime_seconds();
> > +	}
> > +	spin_unlock(&nn->client_lock);
> > +	list_for_each_safe(pos, next, &cblist) {
> 
> The usual idiom for draining a list like this is
> 
> 	while (!list_empty(&cblist)) {
> 		clp = list_first_entry( ... );
> 

Agreed. Note that it's also slightly more efficient to do it that way,
since you don't require a "next" pointer.

> 
> > +		clp = list_entry(pos, struct nfs4_client, cl_ra_cblist);
> > +		list_del_init(&clp->cl_ra_cblist);
> > +		clp->cl_ra->ra_keep = 0;
> > +		clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> > +						BIT(RCA4_TYPE_MASK_WDATA_DLG);
> > +		nfsd4_run_cb(&clp->cl_ra->ra_cb);
> > +	}
> > +
> > +}
> > +
> > static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> > {
> > 	if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
> > @@ -7958,6 +8070,7 @@ static int nfs4_state_create_net(struct net *net)
> > 
> > 	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
> > 	INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
> > +	INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
> > 	get_net(net);
> > 
> > 	return 0;
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index 6b33cbbe76d3..12ce9792c5b6 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -368,6 +368,7 @@ struct nfs4_client {
> > #define NFSD4_CLIENT_UPCALL_LOCK	(5)	/* upcall serialization */
> > #define NFSD4_CLIENT_CB_FLAG_MASK	(1 << NFSD4_CLIENT_CB_UPDATE | \
> > 					 1 << NFSD4_CLIENT_CB_KILL)
> > +#define	NFSD4_CLIENT_CB_RECALL_ANY	(6)
> > 	unsigned long		cl_flags;
> > 	const struct cred	*cl_cb_cred;
> > 	struct rpc_clnt		*cl_cb_client;
> > @@ -411,6 +412,10 @@ struct nfs4_client {
> > 
> > 	unsigned int		cl_state;
> > 	atomic_t		cl_delegs_in_recall;
> > +
> > +	struct nfsd4_cb_recall_any	*cl_ra;
> > +	time64_t		cl_ra_time;
> > +	struct list_head	cl_ra_cblist;
> > };
> > 
> > /* struct nfs4_client_reset
> > @@ -642,6 +647,9 @@ enum nfsd4_cb_op {
> > 	NFSPROC4_CLNT_CB_RECALL_ANY,
> > };
> > 
> > +#define	RCA4_TYPE_MASK_RDATA_DLG	0
> > +#define	RCA4_TYPE_MASK_WDATA_DLG	1
> > +
> > /* Returns true iff a is later than b: */
> > static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> > {
> > -- 
> > 2.9.5
> > 
> 
> --
> Chuck Lever
> 
> 
>
diff mbox series

Patch

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 8c854ba3285b..394a05fb46d8 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -196,6 +196,9 @@  struct nfsd_net {
 	atomic_t		nfsd_courtesy_clients;
 	struct shrinker		nfsd_client_shrinker;
 	struct delayed_work	nfsd_shrinker_work;
+
+	struct shrinker		nfsd_deleg_shrinker;
+	struct delayed_work	nfsd_deleg_shrinker_work;
 };
 
 /* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4e718500a00c..813cdb67b370 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2131,6 +2131,7 @@  static void __free_client(struct kref *k)
 	kfree(clp->cl_nii_domain.data);
 	kfree(clp->cl_nii_name.data);
 	idr_destroy(&clp->cl_stateids);
+	kfree(clp->cl_ra);
 	kmem_cache_free(client_slab, clp);
 }
 
@@ -2854,6 +2855,36 @@  static const struct tree_descr client_files[] = {
 	[3] = {""},
 };
 
+static int
+nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
+				struct rpc_task *task)
+{
+	switch (task->tk_status) {
+	case -NFS4ERR_DELAY:
+		rpc_delay(task, 2 * HZ);
+		return 0;
+	default:
+		return 1;
+	}
+}
+
+static void
+nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
+{
+	struct nfs4_client *clp = cb->cb_clp;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+	spin_lock(&nn->client_lock);
+	clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+	put_client_renew_locked(clp);
+	spin_unlock(&nn->client_lock);
+}
+
+static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
+	.done		= nfsd4_cb_recall_any_done,
+	.release	= nfsd4_cb_recall_any_release,
+};
+
 static struct nfs4_client *create_client(struct xdr_netobj name,
 		struct svc_rqst *rqstp, nfs4_verifier *verf)
 {
@@ -2891,6 +2922,14 @@  static struct nfs4_client *create_client(struct xdr_netobj name,
 		free_client(clp);
 		return NULL;
 	}
+	clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
+	if (!clp->cl_ra) {
+		free_client(clp);
+		return NULL;
+	}
+	clp->cl_ra_time = 0;
+	nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
+			NFSPROC4_CLNT_CB_RECALL_ANY);
 	return clp;
 }
 
@@ -4365,11 +4404,32 @@  nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
 	return SHRINK_STOP;
 }
 
+static unsigned long
+nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+	unsigned long cnt;
+	struct nfsd_net *nn = container_of(shrink,
+				struct nfsd_net, nfsd_deleg_shrinker);
+
+	cnt = atomic_long_read(&num_delegations);
+	if (cnt)
+		mod_delayed_work(laundry_wq,
+			&nn->nfsd_deleg_shrinker_work, 0);
+	return cnt;
+}
+
+static unsigned long
+nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+	return SHRINK_STOP;
+}
+
 int
 nfsd4_init_leases_net(struct nfsd_net *nn)
 {
 	struct sysinfo si;
 	u64 max_clients;
+	int retval;
 
 	nn->nfsd4_lease = 90;	/* default lease time */
 	nn->nfsd4_grace = 90;
@@ -4390,13 +4450,24 @@  nfsd4_init_leases_net(struct nfsd_net *nn)
 	nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
 	nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
 	nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
-	return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+	retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+	if (retval)
+		return retval;
+	nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
+	nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
+	nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
+	retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
+	if (retval)
+		unregister_shrinker(&nn->nfsd_client_shrinker);
+	return retval;
+
 }
 
 void
 nfsd4_leases_net_shutdown(struct nfsd_net *nn)
 {
 	unregister_shrinker(&nn->nfsd_client_shrinker);
+	unregister_shrinker(&nn->nfsd_deleg_shrinker);
 }
 
 static void init_nfs4_replay(struct nfs4_replay *rp)
@@ -6135,6 +6206,47 @@  courtesy_client_reaper(struct work_struct *reaper)
 	nfs4_process_client_reaplist(&reaplist);
 }
 
+static void
+deleg_reaper(struct work_struct *deleg_work)
+{
+	struct list_head *pos, *next;
+	struct nfs4_client *clp;
+	struct list_head cblist;
+	struct delayed_work *dwork = to_delayed_work(deleg_work);
+	struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+					nfsd_deleg_shrinker_work);
+
+	INIT_LIST_HEAD(&cblist);
+	spin_lock(&nn->client_lock);
+	list_for_each_safe(pos, next, &nn->client_lru) {
+		clp = list_entry(pos, struct nfs4_client, cl_lru);
+		if (clp->cl_state != NFSD4_ACTIVE ||
+			list_empty(&clp->cl_delegations) ||
+			atomic_read(&clp->cl_delegs_in_recall) ||
+			test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
+			(ktime_get_boottime_seconds() -
+				clp->cl_ra_time < 5)) {
+			continue;
+		}
+		list_add(&clp->cl_ra_cblist, &cblist);
+
+		/* release in nfsd4_cb_recall_any_release */
+		atomic_inc(&clp->cl_rpc_users);
+		set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+		clp->cl_ra_time = ktime_get_boottime_seconds();
+	}
+	spin_unlock(&nn->client_lock);
+	list_for_each_safe(pos, next, &cblist) {
+		clp = list_entry(pos, struct nfs4_client, cl_ra_cblist);
+		list_del_init(&clp->cl_ra_cblist);
+		clp->cl_ra->ra_keep = 0;
+		clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
+						BIT(RCA4_TYPE_MASK_WDATA_DLG);
+		nfsd4_run_cb(&clp->cl_ra->ra_cb);
+	}
+
+}
+
 static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
 {
 	if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
@@ -7958,6 +8070,7 @@  static int nfs4_state_create_net(struct net *net)
 
 	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
 	INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
+	INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
 	get_net(net);
 
 	return 0;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 6b33cbbe76d3..12ce9792c5b6 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -368,6 +368,7 @@  struct nfs4_client {
 #define NFSD4_CLIENT_UPCALL_LOCK	(5)	/* upcall serialization */
 #define NFSD4_CLIENT_CB_FLAG_MASK	(1 << NFSD4_CLIENT_CB_UPDATE | \
 					 1 << NFSD4_CLIENT_CB_KILL)
+#define	NFSD4_CLIENT_CB_RECALL_ANY	(6)
 	unsigned long		cl_flags;
 	const struct cred	*cl_cb_cred;
 	struct rpc_clnt		*cl_cb_client;
@@ -411,6 +412,10 @@  struct nfs4_client {
 
 	unsigned int		cl_state;
 	atomic_t		cl_delegs_in_recall;
+
+	struct nfsd4_cb_recall_any	*cl_ra;
+	time64_t		cl_ra_time;
+	struct list_head	cl_ra_cblist;
 };
 
 /* struct nfs4_client_reset
@@ -642,6 +647,9 @@  enum nfsd4_cb_op {
 	NFSPROC4_CLNT_CB_RECALL_ANY,
 };
 
+#define	RCA4_TYPE_MASK_RDATA_DLG	0
+#define	RCA4_TYPE_MASK_WDATA_DLG	1
+
 /* Returns true iff a is later than b: */
 static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
 {