Message ID | 1663085170-23136-3-git-send-email-dai.ngo@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | NFSD: memory shrinker for NFSv4 clients | expand |
> On Sep 13, 2022, at 9:06 AM, Dai Ngo <dai.ngo@oracle.com> wrote: > > Add courtesy_client_reaper to react to low memory condition triggered > by the system memory shrinker. > > The delayed_work for the courtesy_client_reaper is scheduled on > the shrinker's count callback using the laundry_wq. > > The shrinker's scan callback is not used for expiring the courtesy > clients due to potential deadlocks. > > The courtesy_client_reaper rechedules itself to run if low memory > condition persits and there are more courtesy clients in the system. > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com> > --- > fs/nfsd/netns.h | 3 ++ > fs/nfsd/nfs4state.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++----- > fs/nfsd/nfsctl.c | 6 ++-- > fs/nfsd/nfsd.h | 7 ++-- > 4 files changed, 106 insertions(+), 12 deletions(-) > > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h > index 55c7006d6109..37457b104eee 100644 > --- a/fs/nfsd/netns.h > +++ b/fs/nfsd/netns.h > @@ -194,6 +194,9 @@ struct nfsd_net { > int nfs4_max_clients; > > atomic_t nfsd_courtesy_clients; > + atomic_t nfsd_client_shrinker_cb_count; Now that you have a separate function to handle courtesy client reaping, please get rid of nfsd_client_shrinker_cb_count. > + struct shrinker nfsd_client_shrinker; > + struct delayed_work nfsd_shrinker_work; > }; > > /* Simple check to find out if a given net was properly initialized */ > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index 3af4fc5241b2..fed4ca3fb581 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -4347,7 +4347,28 @@ nfsd4_init_slabs(void) > return -ENOMEM; > } > > -void nfsd4_init_leases_net(struct nfsd_net *nn) > +static unsigned long > +nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc) > +{ > + int cnt; > + struct nfsd_net *nn = container_of(shrink, > + struct nfsd_net, nfsd_client_shrinker); > + > + atomic_inc(&nn->nfsd_client_shrinker_cb_count); > + cnt = atomic_read(&nn->nfsd_courtesy_clients); > + if (cnt > 0) > + mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0); > + return (unsigned long)cnt; > +} > + > +static unsigned long > +nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc) > +{ > + return SHRINK_STOP; > +} > + > +int > +nfsd4_init_leases_net(struct nfsd_net *nn) > { > struct sysinfo si; > u64 max_clients; > @@ -4368,6 +4389,17 @@ void nfsd4_init_leases_net(struct nfsd_net *nn) > nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); > > atomic_set(&nn->nfsd_courtesy_clients, 0); > + atomic_set(&nn->nfsd_client_shrinker_cb_count, 0); > + nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan; > + nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count; > + nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; > + return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"); > +} > + > +void > +nfsd4_leases_net_shutdown(struct nfsd_net *nn) > +{ > + unregister_shrinker(&nn->nfsd_client_shrinker); > } > > static void init_nfs4_replay(struct nfs4_replay *rp) > @@ -5909,10 +5941,50 @@ nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist, > spin_unlock(&nn->client_lock); > } > > +static void > +nfs4_get_courtesy_client_reaplist(struct nfsd_net *nn, > + struct list_head *reaplist) > +{ > + unsigned int maxreap = 0, reapcnt = 0; > + struct list_head *pos, *next; > + struct nfs4_client *clp; > + > + maxreap = NFSD_CLIENT_MAX_TRIM_PER_RUN; > + atomic_set(&nn->nfsd_client_shrinker_cb_count, 0); > + INIT_LIST_HEAD(reaplist); > + > + spin_lock(&nn->client_lock); > + list_for_each_safe(pos, next, &nn->client_lru) { > + clp = list_entry(pos, struct nfs4_client, cl_lru); > + if (clp->cl_state == NFSD4_ACTIVE) > + break; > + if (reapcnt >= maxreap) > + break; > + if (!mark_client_expired_locked(clp)) { > + list_add(&clp->cl_lru, reaplist); > + reapcnt++; > + } > + } > + spin_unlock(&nn->client_lock); > +} > + > +static inline void > +nfs4_process_client_reaplist(struct list_head *reaplist) > +{ > + struct list_head *pos, *next; > + struct nfs4_client *clp; > + > + list_for_each_safe(pos, next, reaplist) { > + clp = list_entry(pos, struct nfs4_client, cl_lru); > + trace_nfsd_clid_purged(&clp->cl_clientid); > + list_del_init(&clp->cl_lru); > + expire_client(clp); > + } > +} > + > static time64_t > nfs4_laundromat(struct nfsd_net *nn) > { > - struct nfs4_client *clp; > struct nfs4_openowner *oo; > struct nfs4_delegation *dp; > struct nfs4_ol_stateid *stp; > @@ -5941,12 +6013,8 @@ nfs4_laundromat(struct nfsd_net *nn) > } > spin_unlock(&nn->s2s_cp_lock); > nfs4_get_client_reaplist(nn, &reaplist, <); > - list_for_each_safe(pos, next, &reaplist) { > - clp = list_entry(pos, struct nfs4_client, cl_lru); > - trace_nfsd_clid_purged(&clp->cl_clientid); > - list_del_init(&clp->cl_lru); > - expire_client(clp); > - } > + nfs4_process_client_reaplist(&reaplist); > + > spin_lock(&state_lock); > list_for_each_safe(pos, next, &nn->del_recall_lru) { > dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); > @@ -6029,6 +6097,23 @@ laundromat_main(struct work_struct *laundry) > queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); > } > > +static void > +courtesy_client_reaper(struct work_struct *reaper) > +{ > + struct list_head reaplist; > + struct delayed_work *dwork = to_delayed_work(reaper); > + struct nfsd_net *nn = container_of(dwork, struct nfsd_net, > + nfsd_shrinker_work); > + > + nfs4_get_courtesy_client_reaplist(nn, &reaplist); > + nfs4_process_client_reaplist(&reaplist); > + if (atomic_read(&nn->nfsd_client_shrinker_cb_count) > 0 && > + atomic_read(&nn->nfsd_courtesy_clients) > 0) { > + queue_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, > + NFSD_CLIENT_SHRINKER_MINTIMEOUT * HZ); IIUC, the count_objects callback will schedule reaping again if it should be necessary. In fact, I wonder if it's possible for count_objects to schedule this nn just as we're calling queue_delayed_work() here -- that would corrupt the list of work queue items, I would think. I don't think we want a recursive invocation here -- you can get rid of this queue_delayed_work and CLIENT_SHRINKER_MINTIMEOUT. > + } > +} > + > static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) > { > if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) > @@ -7845,6 +7930,7 @@ static int nfs4_state_create_net(struct net *net) > INIT_LIST_HEAD(&nn->blocked_locks_lru); > > INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); > + INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper); > get_net(net); > > return 0; > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > index 917fa1892fd2..597a26ad4183 100644 > --- a/fs/nfsd/nfsctl.c > +++ b/fs/nfsd/nfsctl.c > @@ -1481,11 +1481,12 @@ static __net_init int nfsd_init_net(struct net *net) > goto out_idmap_error; > nn->nfsd_versions = NULL; > nn->nfsd4_minorversions = NULL; > + retval = nfsd4_init_leases_net(nn); > + if (retval) > + goto out_drc_error; > retval = nfsd_reply_cache_init(nn); > if (retval) > goto out_drc_error; > - nfsd4_init_leases_net(nn); > - > get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); > seqlock_init(&nn->writeverf_lock); > > @@ -1507,6 +1508,7 @@ static __net_exit void nfsd_exit_net(struct net *net) > nfsd_idmap_shutdown(net); > nfsd_export_shutdown(net); > nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); > + nfsd4_leases_net_shutdown(nn); > } > > static struct pernet_operations nfsd_net_ops = { > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h > index 57a468ed85c3..cd92f615faa3 100644 > --- a/fs/nfsd/nfsd.h > +++ b/fs/nfsd/nfsd.h > @@ -343,6 +343,7 @@ void nfsd_lockd_shutdown(void); > #define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */ > #define NFSD_CLIENT_MAX_TRIM_PER_RUN 128 > #define NFS4_CLIENTS_PER_GB 1024 > +#define NFSD_CLIENT_SHRINKER_MINTIMEOUT 1 /* seconds */ > > /* > * The following attributes are currently not supported by the NFSv4 server: > @@ -498,7 +499,8 @@ extern void unregister_cld_notifier(void); > extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); > #endif > > -extern void nfsd4_init_leases_net(struct nfsd_net *nn); > +extern int nfsd4_init_leases_net(struct nfsd_net *nn); > +extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn); > > #else /* CONFIG_NFSD_V4 */ > static inline int nfsd4_is_junction(struct dentry *dentry) > @@ -506,7 +508,8 @@ static inline int nfsd4_is_junction(struct dentry *dentry) > return 0; > } > > -static inline void nfsd4_init_leases_net(struct nfsd_net *nn) {}; > +static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; }; > +static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {}; > > #define register_cld_notifier() 0 > #define unregister_cld_notifier() do { } while(0) > -- > 2.9.5 > -- Chuck Lever
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 55c7006d6109..37457b104eee 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -194,6 +194,9 @@ struct nfsd_net { int nfs4_max_clients; atomic_t nfsd_courtesy_clients; + atomic_t nfsd_client_shrinker_cb_count; + struct shrinker nfsd_client_shrinker; + struct delayed_work nfsd_shrinker_work; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3af4fc5241b2..fed4ca3fb581 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4347,7 +4347,28 @@ nfsd4_init_slabs(void) return -ENOMEM; } -void nfsd4_init_leases_net(struct nfsd_net *nn) +static unsigned long +nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc) +{ + int cnt; + struct nfsd_net *nn = container_of(shrink, + struct nfsd_net, nfsd_client_shrinker); + + atomic_inc(&nn->nfsd_client_shrinker_cb_count); + cnt = atomic_read(&nn->nfsd_courtesy_clients); + if (cnt > 0) + mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0); + return (unsigned long)cnt; +} + +static unsigned long +nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + return SHRINK_STOP; +} + +int +nfsd4_init_leases_net(struct nfsd_net *nn) { struct sysinfo si; u64 max_clients; @@ -4368,6 +4389,17 @@ void nfsd4_init_leases_net(struct nfsd_net *nn) nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); atomic_set(&nn->nfsd_courtesy_clients, 0); + atomic_set(&nn->nfsd_client_shrinker_cb_count, 0); + nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan; + nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count; + nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; + return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"); +} + +void +nfsd4_leases_net_shutdown(struct nfsd_net *nn) +{ + unregister_shrinker(&nn->nfsd_client_shrinker); } static void init_nfs4_replay(struct nfs4_replay *rp) @@ -5909,10 +5941,50 @@ nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist, spin_unlock(&nn->client_lock); } +static void +nfs4_get_courtesy_client_reaplist(struct nfsd_net *nn, + struct list_head *reaplist) +{ + unsigned int maxreap = 0, reapcnt = 0; + struct list_head *pos, *next; + struct nfs4_client *clp; + + maxreap = NFSD_CLIENT_MAX_TRIM_PER_RUN; + atomic_set(&nn->nfsd_client_shrinker_cb_count, 0); + INIT_LIST_HEAD(reaplist); + + spin_lock(&nn->client_lock); + list_for_each_safe(pos, next, &nn->client_lru) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + if (clp->cl_state == NFSD4_ACTIVE) + break; + if (reapcnt >= maxreap) + break; + if (!mark_client_expired_locked(clp)) { + list_add(&clp->cl_lru, reaplist); + reapcnt++; + } + } + spin_unlock(&nn->client_lock); +} + +static inline void +nfs4_process_client_reaplist(struct list_head *reaplist) +{ + struct list_head *pos, *next; + struct nfs4_client *clp; + + list_for_each_safe(pos, next, reaplist) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + trace_nfsd_clid_purged(&clp->cl_clientid); + list_del_init(&clp->cl_lru); + expire_client(clp); + } +} + static time64_t nfs4_laundromat(struct nfsd_net *nn) { - struct nfs4_client *clp; struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct nfs4_ol_stateid *stp; @@ -5941,12 +6013,8 @@ nfs4_laundromat(struct nfsd_net *nn) } spin_unlock(&nn->s2s_cp_lock); nfs4_get_client_reaplist(nn, &reaplist, <); - list_for_each_safe(pos, next, &reaplist) { - clp = list_entry(pos, struct nfs4_client, cl_lru); - trace_nfsd_clid_purged(&clp->cl_clientid); - list_del_init(&clp->cl_lru); - expire_client(clp); - } + nfs4_process_client_reaplist(&reaplist); + spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); @@ -6029,6 +6097,23 @@ laundromat_main(struct work_struct *laundry) queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } +static void +courtesy_client_reaper(struct work_struct *reaper) +{ + struct list_head reaplist; + struct delayed_work *dwork = to_delayed_work(reaper); + struct nfsd_net *nn = container_of(dwork, struct nfsd_net, + nfsd_shrinker_work); + + nfs4_get_courtesy_client_reaplist(nn, &reaplist); + nfs4_process_client_reaplist(&reaplist); + if (atomic_read(&nn->nfsd_client_shrinker_cb_count) > 0 && + atomic_read(&nn->nfsd_courtesy_clients) > 0) { + queue_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, + NFSD_CLIENT_SHRINKER_MINTIMEOUT * HZ); + } +} + static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) { if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) @@ -7845,6 +7930,7 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->blocked_locks_lru); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); + INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper); get_net(net); return 0; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 917fa1892fd2..597a26ad4183 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1481,11 +1481,12 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; + retval = nfsd4_init_leases_net(nn); + if (retval) + goto out_drc_error; retval = nfsd_reply_cache_init(nn); if (retval) goto out_drc_error; - nfsd4_init_leases_net(nn); - get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); seqlock_init(&nn->writeverf_lock); @@ -1507,6 +1508,7 @@ static __net_exit void nfsd_exit_net(struct net *net) nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); + nfsd4_leases_net_shutdown(nn); } static struct pernet_operations nfsd_net_ops = { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 57a468ed85c3..cd92f615faa3 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -343,6 +343,7 @@ void nfsd_lockd_shutdown(void); #define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */ #define NFSD_CLIENT_MAX_TRIM_PER_RUN 128 #define NFS4_CLIENTS_PER_GB 1024 +#define NFSD_CLIENT_SHRINKER_MINTIMEOUT 1 /* seconds */ /* * The following attributes are currently not supported by the NFSv4 server: @@ -498,7 +499,8 @@ extern void unregister_cld_notifier(void); extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); #endif -extern void nfsd4_init_leases_net(struct nfsd_net *nn); +extern int nfsd4_init_leases_net(struct nfsd_net *nn); +extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn); #else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) @@ -506,7 +508,8 @@ static inline int nfsd4_is_junction(struct dentry *dentry) return 0; } -static inline void nfsd4_init_leases_net(struct nfsd_net *nn) {}; +static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; }; +static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {}; #define register_cld_notifier() 0 #define unregister_cld_notifier() do { } while(0)
Add courtesy_client_reaper to react to low memory condition triggered by the system memory shrinker. The delayed_work for the courtesy_client_reaper is scheduled on the shrinker's count callback using the laundry_wq. The shrinker's scan callback is not used for expiring the courtesy clients due to potential deadlocks. The courtesy_client_reaper rechedules itself to run if low memory condition persits and there are more courtesy clients in the system. Signed-off-by: Dai Ngo <dai.ngo@oracle.com> --- fs/nfsd/netns.h | 3 ++ fs/nfsd/nfs4state.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++----- fs/nfsd/nfsctl.c | 6 ++-- fs/nfsd/nfsd.h | 7 ++-- 4 files changed, 106 insertions(+), 12 deletions(-)