Message ID | 166498178061.1527.15489022568685172014.stgit@manet.1015granger.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | A course adjustment, maybe | expand |
> On Oct 5, 2022, at 10:56 AM, Chuck Lever <chuck.lever@oracle.com> wrote: > > fh_match() is expensive to use for hash chains that contain more > than a few objects. With common workloads, I see multiple thousands > of objects stored in file_hashtbl[], which always has only 256 > buckets. > > Replace it with an rhashtable, which dynamically resizes its bucket > array to keep hash chains short. > > This also enables the removal of the use of state_lock to serialize > operations on the new rhashtable. > > The result is an improvement in the latency of NFSv4 operations > and the reduction of nfsd CPU utilization due to the cache misses > of walking long hash chains in file_hashtbl. > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com> > Reviewed-by: Jeff Layton <jlayton@kernel.org> > Signed-off-by: Chuck Lever <chuck.lever@oracle.com> > --- > fs/nfsd/nfs4state.c | 229 +++++++++++++++++++++++++++++++++++---------------- > fs/nfsd/state.h | 5 - > 2 files changed, 158 insertions(+), 76 deletions(-) > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index 2b850de288cf..06499b9481a6 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -44,7 +44,9 @@ > #include <linux/jhash.h> > #include <linux/string_helpers.h> > #include <linux/fsnotify.h> > +#include <linux/rhashtable.h> > #include <linux/nfs_ssc.h> > + > #include "xdr4.h" > #include "xdr4cb.h" > #include "vfs.h" > @@ -84,6 +86,7 @@ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) > static void nfs4_free_ol_stateid(struct nfs4_stid *stid); > void nfsd4_end_grace(struct nfsd_net *nn); > static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); > +static void unhash_nfs4_file(struct nfs4_file *fp); > > /* Locking: */ > > @@ -577,11 +580,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu) > void > put_nfs4_file(struct nfs4_file *fi) > { > - might_lock(&state_lock); > - > - if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { > - hlist_del_rcu(&fi->fi_hash); > - spin_unlock(&state_lock); > + if (refcount_dec_and_test(&fi->fi_ref)) { > + unhash_nfs4_file(fi); > WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); > WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); > call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); > @@ -695,19 +695,85 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) > return ret & OWNER_HASH_MASK; > } > > -/* hash table for nfs4_file */ > -#define FILE_HASH_BITS 8 > -#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) > +static struct rhashtable nfs4_file_rhashtbl ____cacheline_aligned_in_smp; > > -static unsigned int file_hashval(struct svc_fh *fh) > +/* > + * The returned hash value is based solely on the address of an in-code > + * inode, a pointer to a slab-allocated object. The entropy in such a > + * pointer is concentrated in its middle bits. > + */ > +static u32 nfs4_file_inode_hash(const struct inode *inode, u32 seed) > +{ > + unsigned long ptr = (unsigned long)inode; > + u32 k; > + > + k = ptr >> L1_CACHE_SHIFT; > + k &= 0x00ffffff; > + return jhash2(&k, 1, seed); > +} > + > +/** > + * nfs4_file_key_hashfn - Compute the hash value of a lookup key > + * @data: key on which to compute the hash value > + * @len: rhash table's key_len parameter (unused) > + * @seed: rhash table's random seed of the day > + * > + * Return value: > + * Computed 32-bit hash value > + */ > +static u32 nfs4_file_key_hashfn(const void *data, u32 len, u32 seed) > { > - struct inode *inode = d_inode(fh->fh_dentry); > + const struct svc_fh *fhp = data; > > - /* XXX: why not (here & in file cache) use inode? */ > - return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS); > + return nfs4_file_inode_hash(d_inode(fhp->fh_dentry), seed); > } > > -static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; > +/** > + * nfs4_file_obj_hashfn - Compute the hash value of an nfs4_file object > + * @data: object on which to compute the hash value > + * @len: rhash table's key_len parameter (unused) > + * @seed: rhash table's random seed of the day > + * > + * Return value: > + * Computed 32-bit hash value > + */ > +static u32 nfs4_file_obj_hashfn(const void *data, u32 len, u32 seed) > +{ > + const struct nfs4_file *fi = data; > + > + return nfs4_file_inode_hash(fi->fi_inode, seed); > +} > + > +/** > + * nfs4_file_obj_cmpfn - Match a cache item against search criteria > + * @arg: search criteria > + * @ptr: cache item to check > + * > + * Return values: > + * %0 - Item matches search criteria > + * %1 - Item does not match search criteria > + */ > +static int nfs4_file_obj_cmpfn(struct rhashtable_compare_arg *arg, > + const void *ptr) > +{ > + const struct svc_fh *fhp = arg->key; > + const struct nfs4_file *fi = ptr; > + > + return fh_match(&fi->fi_fhandle, &fhp->fh_handle) ? 0 : 1; > +} > + > +static const struct rhashtable_params nfs4_file_rhash_params = { > + .key_len = sizeof_field(struct nfs4_file, fi_inode), > + .key_offset = offsetof(struct nfs4_file, fi_inode), > + .head_offset = offsetof(struct nfs4_file, fi_rhash), > + .hashfn = nfs4_file_key_hashfn, > + .obj_hashfn = nfs4_file_obj_hashfn, > + .obj_cmpfn = nfs4_file_obj_cmpfn, > + > + /* Reduce resizing churn on light workloads */ > + .min_size = 512, /* buckets */ > + .automatic_shrinking = true, > +}; > > /* > * Check if courtesy clients have conflicting access and resolve it if possible > @@ -4251,11 +4317,8 @@ static struct nfs4_file *nfsd4_alloc_file(void) > } > > /* OPEN Share state helper functions */ > -static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, > - struct nfs4_file *fp) > +static void init_nfs4_file(const struct svc_fh *fh, struct nfs4_file *fp) > { > - lockdep_assert_held(&state_lock); > - > refcount_set(&fp->fi_ref, 1); > spin_lock_init(&fp->fi_lock); > INIT_LIST_HEAD(&fp->fi_stateids); > @@ -4273,7 +4336,6 @@ static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, > INIT_LIST_HEAD(&fp->fi_lo_states); > atomic_set(&fp->fi_lo_recalls, 0); > #endif > - hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); > } > > void > @@ -4626,71 +4688,84 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) > nfs4_put_stid(&last->st_stid); > } > > -/* search file_hashtbl[] for file */ > -static struct nfs4_file * > -find_file_locked(struct svc_fh *fh, unsigned int hashval) > +static struct nfs4_file *find_nfs4_file(const struct svc_fh *fhp) > { > - struct nfs4_file *fp; > + struct nfs4_file *fi; > > - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, > - lockdep_is_held(&state_lock)) { > - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { > - if (refcount_inc_not_zero(&fp->fi_ref)) > - return fp; > - } > - } > - return NULL; > + rcu_read_lock(); > + fi = rhashtable_lookup(&nfs4_file_rhashtbl, fhp, > + nfs4_file_rhash_params); > + if (fi) > + if (!refcount_inc_not_zero(&fi->fi_ref)) > + fi = NULL; > + rcu_read_unlock(); > + return fi; > } > > -static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh, > - unsigned int hashval) > +static void check_nfs4_file_aliases_locked(struct nfs4_file *new, > + const struct svc_fh *fhp) > { > - struct nfs4_file *fp; > - struct nfs4_file *ret = NULL; > - bool alias_found = false; > + struct rhashtable *ht = &nfs4_file_rhashtbl; > + struct rhash_lock_head __rcu *const *bkt; > + struct rhashtable_compare_arg arg = { > + .ht = ht, > + .key = fhp, > + }; > + struct bucket_table *tbl; > + struct rhash_head *he; > + unsigned int hash; > > - spin_lock(&state_lock); > - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, > - lockdep_is_held(&state_lock)) { > - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { > - if (refcount_inc_not_zero(&fp->fi_ref)) > - ret = fp; > - } else if (d_inode(fh->fh_dentry) == fp->fi_inode) > - fp->fi_aliased = alias_found = true; > - } > - if (likely(ret == NULL)) { > - nfsd4_init_file(fh, hashval, new); > - new->fi_aliased = alias_found; > - ret = new; > + /* > + * rhashtable guarantees small buckets, thus this loop stays > + * efficient. > + */ > + rcu_read_lock(); > + tbl = rht_dereference_rcu(ht->tbl, ht); > + hash = rht_key_hashfn(ht, tbl, fhp, nfs4_file_rhash_params); > + bkt = rht_bucket(tbl, hash); > + rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) { > + struct nfs4_file *fi; > + > + fi = rht_obj(ht, he); > + if (nfs4_file_obj_cmpfn(&arg, fi) == 0) > + continue; > + if (d_inode(fhp->fh_dentry) == fi->fi_inode) { > + fi->fi_aliased = true; > + new->fi_aliased = true; > + } > } > - spin_unlock(&state_lock); > - return ret; > + rcu_read_unlock(); > } > > -static struct nfs4_file * find_file(struct svc_fh *fh) > +static noinline struct nfs4_file * > +find_or_hash_nfs4_file(struct nfs4_file *new, const struct svc_fh *fhp) > { > - struct nfs4_file *fp; > - unsigned int hashval = file_hashval(fh); > + struct nfs4_file *fi; > > - rcu_read_lock(); > - fp = find_file_locked(fh, hashval); > - rcu_read_unlock(); > - return fp; > -} > + init_nfs4_file(fhp, new); > > -static struct nfs4_file * > -find_or_add_file(struct nfs4_file *new, struct svc_fh *fh) > -{ > - struct nfs4_file *fp; > - unsigned int hashval = file_hashval(fh); > + fi = rhashtable_lookup_get_insert_key(&nfs4_file_rhashtbl, > + fhp, &new->fi_rhash, > + nfs4_file_rhash_params); > + if (!fi) { > + fi = new; > + goto check_aliases; > + } > + if (IS_ERR(fi)) /* or BUG? */ > + return NULL; > + if (!refcount_inc_not_zero(&fi->fi_ref)) > + fi = new; Ah, hrm. Given what we just had to do to nfsd_file_do_acquire(), maybe this needs the same fix to hang onto the RCU read lock while dicking with the nfs4_file object's reference count? > - rcu_read_lock(); > - fp = find_file_locked(fh, hashval); > - rcu_read_unlock(); > - if (fp) > - return fp; > +check_aliases: > + check_nfs4_file_aliases_locked(fi, fhp); > + > + return fi; > +} > > - return insert_file(new, fh, hashval); > +static void unhash_nfs4_file(struct nfs4_file *fi) > +{ > + rhashtable_remove_fast(&nfs4_file_rhashtbl, &fi->fi_rhash, > + nfs4_file_rhash_params); > } > > /* > @@ -4703,9 +4778,10 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) > struct nfs4_file *fp; > __be32 ret = nfs_ok; > > - fp = find_file(current_fh); > + fp = find_nfs4_file(current_fh); > if (!fp) > return ret; > + > /* Check for conflicting share reservations */ > spin_lock(&fp->fi_lock); > if (fp->fi_share_deny & deny_type) > @@ -5548,7 +5624,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf > * and check for delegations in the process of being recalled. > * If not found, create the nfs4_file struct > */ > - fp = find_or_add_file(open->op_file, current_fh); > + fp = find_or_hash_nfs4_file(open->op_file, current_fh); > + if (unlikely(!fp)) > + return nfserr_jukebox; > if (fp != open->op_file) { > status = nfs4_check_deleg(cl, open, &dp); > if (status) > @@ -7905,10 +7983,16 @@ nfs4_state_start(void) > { > int ret; > > - ret = nfsd4_create_callback_queue(); > + ret = rhashtable_init(&nfs4_file_rhashtbl, &nfs4_file_rhash_params); > if (ret) > return ret; > > + ret = nfsd4_create_callback_queue(); > + if (ret) { > + rhashtable_destroy(&nfs4_file_rhashtbl); > + return ret; > + } > + > set_max_delegations(); > return 0; > } > @@ -7939,6 +8023,7 @@ nfs4_state_shutdown_net(struct net *net) > > nfsd4_client_tracking_exit(net); > nfs4_state_destroy_net(net); > + rhashtable_destroy(&nfs4_file_rhashtbl); > #ifdef CONFIG_NFSD_V4_2_INTER_SSC > nfsd4_ssc_shutdown_umount(nn); > #endif > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index ae596dbf8667..879f085bc39e 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -536,16 +536,13 @@ struct nfs4_clnt_odstate { > * inode can have multiple filehandles associated with it, so there is > * (potentially) a many to one relationship between this struct and struct > * inode. > - * > - * These are hashed by filehandle in the file_hashtbl, which is protected by > - * the global state_lock spinlock. > */ > struct nfs4_file { > refcount_t fi_ref; > struct inode * fi_inode; > bool fi_aliased; > spinlock_t fi_lock; > - struct hlist_node fi_hash; /* hash on fi_fhandle */ > + struct rhash_head fi_rhash; > struct list_head fi_stateids; > union { > struct list_head fi_delegations; > > -- Chuck Lever
On Wed, 2022-10-05 at 15:11 +0000, Chuck Lever III wrote: > > > On Oct 5, 2022, at 10:56 AM, Chuck Lever <chuck.lever@oracle.com> wrote: > > > > fh_match() is expensive to use for hash chains that contain more > > than a few objects. With common workloads, I see multiple thousands > > of objects stored in file_hashtbl[], which always has only 256 > > buckets. > > > > Replace it with an rhashtable, which dynamically resizes its bucket > > array to keep hash chains short. > > > > This also enables the removal of the use of state_lock to serialize > > operations on the new rhashtable. > > > > The result is an improvement in the latency of NFSv4 operations > > and the reduction of nfsd CPU utilization due to the cache misses > > of walking long hash chains in file_hashtbl. > > > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com> > > Reviewed-by: Jeff Layton <jlayton@kernel.org> > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com> > > --- > > fs/nfsd/nfs4state.c | 229 +++++++++++++++++++++++++++++++++++---------------- > > fs/nfsd/state.h | 5 - > > 2 files changed, 158 insertions(+), 76 deletions(-) > > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > > index 2b850de288cf..06499b9481a6 100644 > > --- a/fs/nfsd/nfs4state.c > > +++ b/fs/nfsd/nfs4state.c > > @@ -44,7 +44,9 @@ > > #include <linux/jhash.h> > > #include <linux/string_helpers.h> > > #include <linux/fsnotify.h> > > +#include <linux/rhashtable.h> > > #include <linux/nfs_ssc.h> > > + > > #include "xdr4.h" > > #include "xdr4cb.h" > > #include "vfs.h" > > @@ -84,6 +86,7 @@ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) > > static void nfs4_free_ol_stateid(struct nfs4_stid *stid); > > void nfsd4_end_grace(struct nfsd_net *nn); > > static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); > > +static void unhash_nfs4_file(struct nfs4_file *fp); > > > > /* Locking: */ > > > > @@ -577,11 +580,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu) > > void > > put_nfs4_file(struct nfs4_file *fi) > > { > > - might_lock(&state_lock); > > - > > - if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { > > - hlist_del_rcu(&fi->fi_hash); > > - spin_unlock(&state_lock); > > + if (refcount_dec_and_test(&fi->fi_ref)) { > > + unhash_nfs4_file(fi); > > WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); > > WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); > > call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); > > @@ -695,19 +695,85 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) > > return ret & OWNER_HASH_MASK; > > } > > > > -/* hash table for nfs4_file */ > > -#define FILE_HASH_BITS 8 > > -#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) > > +static struct rhashtable nfs4_file_rhashtbl ____cacheline_aligned_in_smp; > > > > -static unsigned int file_hashval(struct svc_fh *fh) > > +/* > > + * The returned hash value is based solely on the address of an in-code > > + * inode, a pointer to a slab-allocated object. The entropy in such a > > + * pointer is concentrated in its middle bits. > > + */ > > +static u32 nfs4_file_inode_hash(const struct inode *inode, u32 seed) > > +{ > > + unsigned long ptr = (unsigned long)inode; > > + u32 k; > > + > > + k = ptr >> L1_CACHE_SHIFT; > > + k &= 0x00ffffff; > > + return jhash2(&k, 1, seed); > > +} > > + > > +/** > > + * nfs4_file_key_hashfn - Compute the hash value of a lookup key > > + * @data: key on which to compute the hash value > > + * @len: rhash table's key_len parameter (unused) > > + * @seed: rhash table's random seed of the day > > + * > > + * Return value: > > + * Computed 32-bit hash value > > + */ > > +static u32 nfs4_file_key_hashfn(const void *data, u32 len, u32 seed) > > { > > - struct inode *inode = d_inode(fh->fh_dentry); > > + const struct svc_fh *fhp = data; > > > > - /* XXX: why not (here & in file cache) use inode? */ > > - return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS); > > + return nfs4_file_inode_hash(d_inode(fhp->fh_dentry), seed); > > } > > > > -static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; > > +/** > > + * nfs4_file_obj_hashfn - Compute the hash value of an nfs4_file object > > + * @data: object on which to compute the hash value > > + * @len: rhash table's key_len parameter (unused) > > + * @seed: rhash table's random seed of the day > > + * > > + * Return value: > > + * Computed 32-bit hash value > > + */ > > +static u32 nfs4_file_obj_hashfn(const void *data, u32 len, u32 seed) > > +{ > > + const struct nfs4_file *fi = data; > > + > > + return nfs4_file_inode_hash(fi->fi_inode, seed); > > +} > > + > > +/** > > + * nfs4_file_obj_cmpfn - Match a cache item against search criteria > > + * @arg: search criteria > > + * @ptr: cache item to check > > + * > > + * Return values: > > + * %0 - Item matches search criteria > > + * %1 - Item does not match search criteria > > + */ > > +static int nfs4_file_obj_cmpfn(struct rhashtable_compare_arg *arg, > > + const void *ptr) > > +{ > > + const struct svc_fh *fhp = arg->key; > > + const struct nfs4_file *fi = ptr; > > + > > + return fh_match(&fi->fi_fhandle, &fhp->fh_handle) ? 0 : 1; > > +} > > + > > +static const struct rhashtable_params nfs4_file_rhash_params = { > > + .key_len = sizeof_field(struct nfs4_file, fi_inode), > > + .key_offset = offsetof(struct nfs4_file, fi_inode), > > + .head_offset = offsetof(struct nfs4_file, fi_rhash), > > + .hashfn = nfs4_file_key_hashfn, > > + .obj_hashfn = nfs4_file_obj_hashfn, > > + .obj_cmpfn = nfs4_file_obj_cmpfn, > > + > > + /* Reduce resizing churn on light workloads */ > > + .min_size = 512, /* buckets */ > > + .automatic_shrinking = true, > > +}; > > > > /* > > * Check if courtesy clients have conflicting access and resolve it if possible > > @@ -4251,11 +4317,8 @@ static struct nfs4_file *nfsd4_alloc_file(void) > > } > > > > /* OPEN Share state helper functions */ > > -static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, > > - struct nfs4_file *fp) > > +static void init_nfs4_file(const struct svc_fh *fh, struct nfs4_file *fp) > > { > > - lockdep_assert_held(&state_lock); > > - > > refcount_set(&fp->fi_ref, 1); > > spin_lock_init(&fp->fi_lock); > > INIT_LIST_HEAD(&fp->fi_stateids); > > @@ -4273,7 +4336,6 @@ static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, > > INIT_LIST_HEAD(&fp->fi_lo_states); > > atomic_set(&fp->fi_lo_recalls, 0); > > #endif > > - hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); > > } > > > > void > > @@ -4626,71 +4688,84 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) > > nfs4_put_stid(&last->st_stid); > > } > > > > -/* search file_hashtbl[] for file */ > > -static struct nfs4_file * > > -find_file_locked(struct svc_fh *fh, unsigned int hashval) > > +static struct nfs4_file *find_nfs4_file(const struct svc_fh *fhp) > > { > > - struct nfs4_file *fp; > > + struct nfs4_file *fi; > > > > - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, > > - lockdep_is_held(&state_lock)) { > > - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { > > - if (refcount_inc_not_zero(&fp->fi_ref)) > > - return fp; > > - } > > - } > > - return NULL; > > + rcu_read_lock(); > > + fi = rhashtable_lookup(&nfs4_file_rhashtbl, fhp, > > + nfs4_file_rhash_params); > > + if (fi) > > + if (!refcount_inc_not_zero(&fi->fi_ref)) > > + fi = NULL; > > + rcu_read_unlock(); > > + return fi; > > } > > > > -static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh, > > - unsigned int hashval) > > +static void check_nfs4_file_aliases_locked(struct nfs4_file *new, > > + const struct svc_fh *fhp) > > { > > - struct nfs4_file *fp; > > - struct nfs4_file *ret = NULL; > > - bool alias_found = false; > > + struct rhashtable *ht = &nfs4_file_rhashtbl; > > + struct rhash_lock_head __rcu *const *bkt; > > + struct rhashtable_compare_arg arg = { > > + .ht = ht, > > + .key = fhp, > > + }; > > + struct bucket_table *tbl; > > + struct rhash_head *he; > > + unsigned int hash; > > > > - spin_lock(&state_lock); > > - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, > > - lockdep_is_held(&state_lock)) { > > - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { > > - if (refcount_inc_not_zero(&fp->fi_ref)) > > - ret = fp; > > - } else if (d_inode(fh->fh_dentry) == fp->fi_inode) > > - fp->fi_aliased = alias_found = true; > > - } > > - if (likely(ret == NULL)) { > > - nfsd4_init_file(fh, hashval, new); > > - new->fi_aliased = alias_found; > > - ret = new; > > + /* > > + * rhashtable guarantees small buckets, thus this loop stays > > + * efficient. > > + */ > > + rcu_read_lock(); > > + tbl = rht_dereference_rcu(ht->tbl, ht); > > + hash = rht_key_hashfn(ht, tbl, fhp, nfs4_file_rhash_params); > > + bkt = rht_bucket(tbl, hash); > > + rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) { > > + struct nfs4_file *fi; > > + > > + fi = rht_obj(ht, he); > > + if (nfs4_file_obj_cmpfn(&arg, fi) == 0) > > + continue; > > + if (d_inode(fhp->fh_dentry) == fi->fi_inode) { > > + fi->fi_aliased = true; > > + new->fi_aliased = true; > > + } > > } > > - spin_unlock(&state_lock); > > - return ret; > > + rcu_read_unlock(); > > } > > > > -static struct nfs4_file * find_file(struct svc_fh *fh) > > +static noinline struct nfs4_file * > > +find_or_hash_nfs4_file(struct nfs4_file *new, const struct svc_fh *fhp) > > { > > - struct nfs4_file *fp; > > - unsigned int hashval = file_hashval(fh); > > + struct nfs4_file *fi; > > > > - rcu_read_lock(); > > - fp = find_file_locked(fh, hashval); > > - rcu_read_unlock(); > > - return fp; > > -} > > + init_nfs4_file(fhp, new); > > > > -static struct nfs4_file * > > -find_or_add_file(struct nfs4_file *new, struct svc_fh *fh) > > -{ > > - struct nfs4_file *fp; > > - unsigned int hashval = file_hashval(fh); > > + fi = rhashtable_lookup_get_insert_key(&nfs4_file_rhashtbl, > > + fhp, &new->fi_rhash, > > + nfs4_file_rhash_params); > > + if (!fi) { > > + fi = new; > > + goto check_aliases; > > + } > > + if (IS_ERR(fi)) /* or BUG? */ > > + return NULL; > > + if (!refcount_inc_not_zero(&fi->fi_ref)) > > + fi = new; > > Ah, hrm. Given what we just had to do to nfsd_file_do_acquire(), > maybe this needs the same fix to hang onto the RCU read lock > while dicking with the nfs4_file object's reference count? > > Yes. Probably we should just merge this patch if you want a fix for mainline: nfsd: rework hashtable handling in nfsd_do_file_acquire > > - rcu_read_lock(); > > - fp = find_file_locked(fh, hashval); > > - rcu_read_unlock(); > > - if (fp) > > - return fp; > > +check_aliases: > > + check_nfs4_file_aliases_locked(fi, fhp); > > + > > + return fi; > > +} > > > > - return insert_file(new, fh, hashval); > > +static void unhash_nfs4_file(struct nfs4_file *fi) > > +{ > > + rhashtable_remove_fast(&nfs4_file_rhashtbl, &fi->fi_rhash, > > + nfs4_file_rhash_params); > > } > > > > /* > > @@ -4703,9 +4778,10 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) > > struct nfs4_file *fp; > > __be32 ret = nfs_ok; > > > > - fp = find_file(current_fh); > > + fp = find_nfs4_file(current_fh); > > if (!fp) > > return ret; > > + > > /* Check for conflicting share reservations */ > > spin_lock(&fp->fi_lock); > > if (fp->fi_share_deny & deny_type) > > @@ -5548,7 +5624,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf > > * and check for delegations in the process of being recalled. > > * If not found, create the nfs4_file struct > > */ > > - fp = find_or_add_file(open->op_file, current_fh); > > + fp = find_or_hash_nfs4_file(open->op_file, current_fh); > > + if (unlikely(!fp)) > > + return nfserr_jukebox; > > if (fp != open->op_file) { > > status = nfs4_check_deleg(cl, open, &dp); > > if (status) > > @@ -7905,10 +7983,16 @@ nfs4_state_start(void) > > { > > int ret; > > > > - ret = nfsd4_create_callback_queue(); > > + ret = rhashtable_init(&nfs4_file_rhashtbl, &nfs4_file_rhash_params); > > if (ret) > > return ret; > > > > + ret = nfsd4_create_callback_queue(); > > + if (ret) { > > + rhashtable_destroy(&nfs4_file_rhashtbl); > > + return ret; > > + } > > + > > set_max_delegations(); > > return 0; > > } > > @@ -7939,6 +8023,7 @@ nfs4_state_shutdown_net(struct net *net) > > > > nfsd4_client_tracking_exit(net); > > nfs4_state_destroy_net(net); > > + rhashtable_destroy(&nfs4_file_rhashtbl); > > #ifdef CONFIG_NFSD_V4_2_INTER_SSC > > nfsd4_ssc_shutdown_umount(nn); > > #endif > > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > > index ae596dbf8667..879f085bc39e 100644 > > --- a/fs/nfsd/state.h > > +++ b/fs/nfsd/state.h > > @@ -536,16 +536,13 @@ struct nfs4_clnt_odstate { > > * inode can have multiple filehandles associated with it, so there is > > * (potentially) a many to one relationship between this struct and struct > > * inode. > > - * > > - * These are hashed by filehandle in the file_hashtbl, which is protected by > > - * the global state_lock spinlock. > > */ > > struct nfs4_file { > > refcount_t fi_ref; > > struct inode * fi_inode; > > bool fi_aliased; > > spinlock_t fi_lock; > > - struct hlist_node fi_hash; /* hash on fi_fhandle */ > > + struct rhash_head fi_rhash; > > struct list_head fi_stateids; > > union { > > struct list_head fi_delegations; > > > > > > -- > Chuck Lever > > >
> On Oct 6, 2022, at 12:12 PM, Jeff Layton <jlayton@poochiereds.net> wrote: > > On Wed, 2022-10-05 at 15:11 +0000, Chuck Lever III wrote: >> >>> On Oct 5, 2022, at 10:56 AM, Chuck Lever <chuck.lever@oracle.com> wrote: >>> >>> fh_match() is expensive to use for hash chains that contain more >>> than a few objects. With common workloads, I see multiple thousands >>> of objects stored in file_hashtbl[], which always has only 256 >>> buckets. >>> >>> Replace it with an rhashtable, which dynamically resizes its bucket >>> array to keep hash chains short. >>> >>> This also enables the removal of the use of state_lock to serialize >>> operations on the new rhashtable. >>> >>> The result is an improvement in the latency of NFSv4 operations >>> and the reduction of nfsd CPU utilization due to the cache misses >>> of walking long hash chains in file_hashtbl. >>> >>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> >>> Reviewed-by: Jeff Layton <jlayton@kernel.org> >>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> >>> --- >>> fs/nfsd/nfs4state.c | 229 +++++++++++++++++++++++++++++++++++---------------- >>> fs/nfsd/state.h | 5 - >>> 2 files changed, 158 insertions(+), 76 deletions(-) >>> >>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c >>> index 2b850de288cf..06499b9481a6 100644 >>> --- a/fs/nfsd/nfs4state.c >>> +++ b/fs/nfsd/nfs4state.c >>> @@ -44,7 +44,9 @@ >>> #include <linux/jhash.h> >>> #include <linux/string_helpers.h> >>> #include <linux/fsnotify.h> >>> +#include <linux/rhashtable.h> >>> #include <linux/nfs_ssc.h> >>> + >>> #include "xdr4.h" >>> #include "xdr4cb.h" >>> #include "vfs.h" >>> @@ -84,6 +86,7 @@ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) >>> static void nfs4_free_ol_stateid(struct nfs4_stid *stid); >>> void nfsd4_end_grace(struct nfsd_net *nn); >>> static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); >>> +static void unhash_nfs4_file(struct nfs4_file *fp); >>> >>> /* Locking: */ >>> >>> @@ -577,11 +580,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu) >>> void >>> put_nfs4_file(struct nfs4_file *fi) >>> { >>> - might_lock(&state_lock); >>> - >>> - if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { >>> - hlist_del_rcu(&fi->fi_hash); >>> - spin_unlock(&state_lock); >>> + if (refcount_dec_and_test(&fi->fi_ref)) { >>> + unhash_nfs4_file(fi); >>> WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); >>> WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); >>> call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); >>> @@ -695,19 +695,85 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) >>> return ret & OWNER_HASH_MASK; >>> } >>> >>> -/* hash table for nfs4_file */ >>> -#define FILE_HASH_BITS 8 >>> -#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) >>> +static struct rhashtable nfs4_file_rhashtbl ____cacheline_aligned_in_smp; >>> >>> -static unsigned int file_hashval(struct svc_fh *fh) >>> +/* >>> + * The returned hash value is based solely on the address of an in-code >>> + * inode, a pointer to a slab-allocated object. The entropy in such a >>> + * pointer is concentrated in its middle bits. >>> + */ >>> +static u32 nfs4_file_inode_hash(const struct inode *inode, u32 seed) >>> +{ >>> + unsigned long ptr = (unsigned long)inode; >>> + u32 k; >>> + >>> + k = ptr >> L1_CACHE_SHIFT; >>> + k &= 0x00ffffff; >>> + return jhash2(&k, 1, seed); >>> +} >>> + >>> +/** >>> + * nfs4_file_key_hashfn - Compute the hash value of a lookup key >>> + * @data: key on which to compute the hash value >>> + * @len: rhash table's key_len parameter (unused) >>> + * @seed: rhash table's random seed of the day >>> + * >>> + * Return value: >>> + * Computed 32-bit hash value >>> + */ >>> +static u32 nfs4_file_key_hashfn(const void *data, u32 len, u32 seed) >>> { >>> - struct inode *inode = d_inode(fh->fh_dentry); >>> + const struct svc_fh *fhp = data; >>> >>> - /* XXX: why not (here & in file cache) use inode? */ >>> - return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS); >>> + return nfs4_file_inode_hash(d_inode(fhp->fh_dentry), seed); >>> } >>> >>> -static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; >>> +/** >>> + * nfs4_file_obj_hashfn - Compute the hash value of an nfs4_file object >>> + * @data: object on which to compute the hash value >>> + * @len: rhash table's key_len parameter (unused) >>> + * @seed: rhash table's random seed of the day >>> + * >>> + * Return value: >>> + * Computed 32-bit hash value >>> + */ >>> +static u32 nfs4_file_obj_hashfn(const void *data, u32 len, u32 seed) >>> +{ >>> + const struct nfs4_file *fi = data; >>> + >>> + return nfs4_file_inode_hash(fi->fi_inode, seed); >>> +} >>> + >>> +/** >>> + * nfs4_file_obj_cmpfn - Match a cache item against search criteria >>> + * @arg: search criteria >>> + * @ptr: cache item to check >>> + * >>> + * Return values: >>> + * %0 - Item matches search criteria >>> + * %1 - Item does not match search criteria >>> + */ >>> +static int nfs4_file_obj_cmpfn(struct rhashtable_compare_arg *arg, >>> + const void *ptr) >>> +{ >>> + const struct svc_fh *fhp = arg->key; >>> + const struct nfs4_file *fi = ptr; >>> + >>> + return fh_match(&fi->fi_fhandle, &fhp->fh_handle) ? 0 : 1; >>> +} >>> + >>> +static const struct rhashtable_params nfs4_file_rhash_params = { >>> + .key_len = sizeof_field(struct nfs4_file, fi_inode), >>> + .key_offset = offsetof(struct nfs4_file, fi_inode), >>> + .head_offset = offsetof(struct nfs4_file, fi_rhash), >>> + .hashfn = nfs4_file_key_hashfn, >>> + .obj_hashfn = nfs4_file_obj_hashfn, >>> + .obj_cmpfn = nfs4_file_obj_cmpfn, >>> + >>> + /* Reduce resizing churn on light workloads */ >>> + .min_size = 512, /* buckets */ >>> + .automatic_shrinking = true, >>> +}; >>> >>> /* >>> * Check if courtesy clients have conflicting access and resolve it if possible >>> @@ -4251,11 +4317,8 @@ static struct nfs4_file *nfsd4_alloc_file(void) >>> } >>> >>> /* OPEN Share state helper functions */ >>> -static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, >>> - struct nfs4_file *fp) >>> +static void init_nfs4_file(const struct svc_fh *fh, struct nfs4_file *fp) >>> { >>> - lockdep_assert_held(&state_lock); >>> - >>> refcount_set(&fp->fi_ref, 1); >>> spin_lock_init(&fp->fi_lock); >>> INIT_LIST_HEAD(&fp->fi_stateids); >>> @@ -4273,7 +4336,6 @@ static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, >>> INIT_LIST_HEAD(&fp->fi_lo_states); >>> atomic_set(&fp->fi_lo_recalls, 0); >>> #endif >>> - hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); >>> } >>> >>> void >>> @@ -4626,71 +4688,84 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) >>> nfs4_put_stid(&last->st_stid); >>> } >>> >>> -/* search file_hashtbl[] for file */ >>> -static struct nfs4_file * >>> -find_file_locked(struct svc_fh *fh, unsigned int hashval) >>> +static struct nfs4_file *find_nfs4_file(const struct svc_fh *fhp) >>> { >>> - struct nfs4_file *fp; >>> + struct nfs4_file *fi; >>> >>> - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, >>> - lockdep_is_held(&state_lock)) { >>> - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { >>> - if (refcount_inc_not_zero(&fp->fi_ref)) >>> - return fp; >>> - } >>> - } >>> - return NULL; >>> + rcu_read_lock(); >>> + fi = rhashtable_lookup(&nfs4_file_rhashtbl, fhp, >>> + nfs4_file_rhash_params); >>> + if (fi) >>> + if (!refcount_inc_not_zero(&fi->fi_ref)) >>> + fi = NULL; >>> + rcu_read_unlock(); >>> + return fi; >>> } >>> >>> -static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh, >>> - unsigned int hashval) >>> +static void check_nfs4_file_aliases_locked(struct nfs4_file *new, >>> + const struct svc_fh *fhp) >>> { >>> - struct nfs4_file *fp; >>> - struct nfs4_file *ret = NULL; >>> - bool alias_found = false; >>> + struct rhashtable *ht = &nfs4_file_rhashtbl; >>> + struct rhash_lock_head __rcu *const *bkt; >>> + struct rhashtable_compare_arg arg = { >>> + .ht = ht, >>> + .key = fhp, >>> + }; >>> + struct bucket_table *tbl; >>> + struct rhash_head *he; >>> + unsigned int hash; >>> >>> - spin_lock(&state_lock); >>> - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, >>> - lockdep_is_held(&state_lock)) { >>> - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { >>> - if (refcount_inc_not_zero(&fp->fi_ref)) >>> - ret = fp; >>> - } else if (d_inode(fh->fh_dentry) == fp->fi_inode) >>> - fp->fi_aliased = alias_found = true; >>> - } >>> - if (likely(ret == NULL)) { >>> - nfsd4_init_file(fh, hashval, new); >>> - new->fi_aliased = alias_found; >>> - ret = new; >>> + /* >>> + * rhashtable guarantees small buckets, thus this loop stays >>> + * efficient. >>> + */ >>> + rcu_read_lock(); >>> + tbl = rht_dereference_rcu(ht->tbl, ht); >>> + hash = rht_key_hashfn(ht, tbl, fhp, nfs4_file_rhash_params); >>> + bkt = rht_bucket(tbl, hash); >>> + rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) { >>> + struct nfs4_file *fi; >>> + >>> + fi = rht_obj(ht, he); >>> + if (nfs4_file_obj_cmpfn(&arg, fi) == 0) >>> + continue; >>> + if (d_inode(fhp->fh_dentry) == fi->fi_inode) { >>> + fi->fi_aliased = true; >>> + new->fi_aliased = true; >>> + } >>> } >>> - spin_unlock(&state_lock); >>> - return ret; >>> + rcu_read_unlock(); >>> } >>> >>> -static struct nfs4_file * find_file(struct svc_fh *fh) >>> +static noinline struct nfs4_file * >>> +find_or_hash_nfs4_file(struct nfs4_file *new, const struct svc_fh *fhp) >>> { >>> - struct nfs4_file *fp; >>> - unsigned int hashval = file_hashval(fh); >>> + struct nfs4_file *fi; >>> >>> - rcu_read_lock(); >>> - fp = find_file_locked(fh, hashval); >>> - rcu_read_unlock(); >>> - return fp; >>> -} >>> + init_nfs4_file(fhp, new); >>> >>> -static struct nfs4_file * >>> -find_or_add_file(struct nfs4_file *new, struct svc_fh *fh) >>> -{ >>> - struct nfs4_file *fp; >>> - unsigned int hashval = file_hashval(fh); >>> + fi = rhashtable_lookup_get_insert_key(&nfs4_file_rhashtbl, >>> + fhp, &new->fi_rhash, >>> + nfs4_file_rhash_params); >>> + if (!fi) { >>> + fi = new; >>> + goto check_aliases; >>> + } >>> + if (IS_ERR(fi)) /* or BUG? */ >>> + return NULL; >>> + if (!refcount_inc_not_zero(&fi->fi_ref)) >>> + fi = new; >> >> Ah, hrm. Given what we just had to do to nfsd_file_do_acquire(), >> maybe this needs the same fix to hang onto the RCU read lock >> while dicking with the nfs4_file object's reference count? >> >> > > Yes. Probably we should just merge this patch if you want a fix for > mainline: > > nfsd: rework hashtable handling in nfsd_do_file_acquire It's queued up. I intend to submit it before leaving for Westford. As for the file_hashtbl, I have fixed that up to be consistent with the approach in nfsd_file_do_acquire(), and will post a refresh in a moment. >>> - rcu_read_lock(); >>> - fp = find_file_locked(fh, hashval); >>> - rcu_read_unlock(); >>> - if (fp) >>> - return fp; >>> +check_aliases: >>> + check_nfs4_file_aliases_locked(fi, fhp); >>> + >>> + return fi; >>> +} >>> >>> - return insert_file(new, fh, hashval); >>> +static void unhash_nfs4_file(struct nfs4_file *fi) >>> +{ >>> + rhashtable_remove_fast(&nfs4_file_rhashtbl, &fi->fi_rhash, >>> + nfs4_file_rhash_params); >>> } >>> >>> /* >>> @@ -4703,9 +4778,10 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) >>> struct nfs4_file *fp; >>> __be32 ret = nfs_ok; >>> >>> - fp = find_file(current_fh); >>> + fp = find_nfs4_file(current_fh); >>> if (!fp) >>> return ret; >>> + >>> /* Check for conflicting share reservations */ >>> spin_lock(&fp->fi_lock); >>> if (fp->fi_share_deny & deny_type) >>> @@ -5548,7 +5624,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf >>> * and check for delegations in the process of being recalled. >>> * If not found, create the nfs4_file struct >>> */ >>> - fp = find_or_add_file(open->op_file, current_fh); >>> + fp = find_or_hash_nfs4_file(open->op_file, current_fh); >>> + if (unlikely(!fp)) >>> + return nfserr_jukebox; >>> if (fp != open->op_file) { >>> status = nfs4_check_deleg(cl, open, &dp); >>> if (status) >>> @@ -7905,10 +7983,16 @@ nfs4_state_start(void) >>> { >>> int ret; >>> >>> - ret = nfsd4_create_callback_queue(); >>> + ret = rhashtable_init(&nfs4_file_rhashtbl, &nfs4_file_rhash_params); >>> if (ret) >>> return ret; >>> >>> + ret = nfsd4_create_callback_queue(); >>> + if (ret) { >>> + rhashtable_destroy(&nfs4_file_rhashtbl); >>> + return ret; >>> + } >>> + >>> set_max_delegations(); >>> return 0; >>> } >>> @@ -7939,6 +8023,7 @@ nfs4_state_shutdown_net(struct net *net) >>> >>> nfsd4_client_tracking_exit(net); >>> nfs4_state_destroy_net(net); >>> + rhashtable_destroy(&nfs4_file_rhashtbl); >>> #ifdef CONFIG_NFSD_V4_2_INTER_SSC >>> nfsd4_ssc_shutdown_umount(nn); >>> #endif >>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h >>> index ae596dbf8667..879f085bc39e 100644 >>> --- a/fs/nfsd/state.h >>> +++ b/fs/nfsd/state.h >>> @@ -536,16 +536,13 @@ struct nfs4_clnt_odstate { >>> * inode can have multiple filehandles associated with it, so there is >>> * (potentially) a many to one relationship between this struct and struct >>> * inode. >>> - * >>> - * These are hashed by filehandle in the file_hashtbl, which is protected by >>> - * the global state_lock spinlock. >>> */ >>> struct nfs4_file { >>> refcount_t fi_ref; >>> struct inode * fi_inode; >>> bool fi_aliased; >>> spinlock_t fi_lock; >>> - struct hlist_node fi_hash; /* hash on fi_fhandle */ >>> + struct rhash_head fi_rhash; >>> struct list_head fi_stateids; >>> union { >>> struct list_head fi_delegations; >>> >>> >> >> -- >> Chuck Lever >> >> >> > > -- > Jeff Layton <jlayton@poochiereds.net> -- Chuck Lever
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2b850de288cf..06499b9481a6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -44,7 +44,9 @@ #include <linux/jhash.h> #include <linux/string_helpers.h> #include <linux/fsnotify.h> +#include <linux/rhashtable.h> #include <linux/nfs_ssc.h> + #include "xdr4.h" #include "xdr4cb.h" #include "vfs.h" @@ -84,6 +86,7 @@ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) static void nfs4_free_ol_stateid(struct nfs4_stid *stid); void nfsd4_end_grace(struct nfsd_net *nn); static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); +static void unhash_nfs4_file(struct nfs4_file *fp); /* Locking: */ @@ -577,11 +580,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu) void put_nfs4_file(struct nfs4_file *fi) { - might_lock(&state_lock); - - if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { - hlist_del_rcu(&fi->fi_hash); - spin_unlock(&state_lock); + if (refcount_dec_and_test(&fi->fi_ref)) { + unhash_nfs4_file(fi); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); @@ -695,19 +695,85 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) return ret & OWNER_HASH_MASK; } -/* hash table for nfs4_file */ -#define FILE_HASH_BITS 8 -#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) +static struct rhashtable nfs4_file_rhashtbl ____cacheline_aligned_in_smp; -static unsigned int file_hashval(struct svc_fh *fh) +/* + * The returned hash value is based solely on the address of an in-code + * inode, a pointer to a slab-allocated object. The entropy in such a + * pointer is concentrated in its middle bits. + */ +static u32 nfs4_file_inode_hash(const struct inode *inode, u32 seed) +{ + unsigned long ptr = (unsigned long)inode; + u32 k; + + k = ptr >> L1_CACHE_SHIFT; + k &= 0x00ffffff; + return jhash2(&k, 1, seed); +} + +/** + * nfs4_file_key_hashfn - Compute the hash value of a lookup key + * @data: key on which to compute the hash value + * @len: rhash table's key_len parameter (unused) + * @seed: rhash table's random seed of the day + * + * Return value: + * Computed 32-bit hash value + */ +static u32 nfs4_file_key_hashfn(const void *data, u32 len, u32 seed) { - struct inode *inode = d_inode(fh->fh_dentry); + const struct svc_fh *fhp = data; - /* XXX: why not (here & in file cache) use inode? */ - return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS); + return nfs4_file_inode_hash(d_inode(fhp->fh_dentry), seed); } -static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; +/** + * nfs4_file_obj_hashfn - Compute the hash value of an nfs4_file object + * @data: object on which to compute the hash value + * @len: rhash table's key_len parameter (unused) + * @seed: rhash table's random seed of the day + * + * Return value: + * Computed 32-bit hash value + */ +static u32 nfs4_file_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const struct nfs4_file *fi = data; + + return nfs4_file_inode_hash(fi->fi_inode, seed); +} + +/** + * nfs4_file_obj_cmpfn - Match a cache item against search criteria + * @arg: search criteria + * @ptr: cache item to check + * + * Return values: + * %0 - Item matches search criteria + * %1 - Item does not match search criteria + */ +static int nfs4_file_obj_cmpfn(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct svc_fh *fhp = arg->key; + const struct nfs4_file *fi = ptr; + + return fh_match(&fi->fi_fhandle, &fhp->fh_handle) ? 0 : 1; +} + +static const struct rhashtable_params nfs4_file_rhash_params = { + .key_len = sizeof_field(struct nfs4_file, fi_inode), + .key_offset = offsetof(struct nfs4_file, fi_inode), + .head_offset = offsetof(struct nfs4_file, fi_rhash), + .hashfn = nfs4_file_key_hashfn, + .obj_hashfn = nfs4_file_obj_hashfn, + .obj_cmpfn = nfs4_file_obj_cmpfn, + + /* Reduce resizing churn on light workloads */ + .min_size = 512, /* buckets */ + .automatic_shrinking = true, +}; /* * Check if courtesy clients have conflicting access and resolve it if possible @@ -4251,11 +4317,8 @@ static struct nfs4_file *nfsd4_alloc_file(void) } /* OPEN Share state helper functions */ -static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, - struct nfs4_file *fp) +static void init_nfs4_file(const struct svc_fh *fh, struct nfs4_file *fp) { - lockdep_assert_held(&state_lock); - refcount_set(&fp->fi_ref, 1); spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); @@ -4273,7 +4336,6 @@ static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, INIT_LIST_HEAD(&fp->fi_lo_states); atomic_set(&fp->fi_lo_recalls, 0); #endif - hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); } void @@ -4626,71 +4688,84 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) nfs4_put_stid(&last->st_stid); } -/* search file_hashtbl[] for file */ -static struct nfs4_file * -find_file_locked(struct svc_fh *fh, unsigned int hashval) +static struct nfs4_file *find_nfs4_file(const struct svc_fh *fhp) { - struct nfs4_file *fp; + struct nfs4_file *fi; - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, - lockdep_is_held(&state_lock)) { - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { - if (refcount_inc_not_zero(&fp->fi_ref)) - return fp; - } - } - return NULL; + rcu_read_lock(); + fi = rhashtable_lookup(&nfs4_file_rhashtbl, fhp, + nfs4_file_rhash_params); + if (fi) + if (!refcount_inc_not_zero(&fi->fi_ref)) + fi = NULL; + rcu_read_unlock(); + return fi; } -static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh, - unsigned int hashval) +static void check_nfs4_file_aliases_locked(struct nfs4_file *new, + const struct svc_fh *fhp) { - struct nfs4_file *fp; - struct nfs4_file *ret = NULL; - bool alias_found = false; + struct rhashtable *ht = &nfs4_file_rhashtbl; + struct rhash_lock_head __rcu *const *bkt; + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = fhp, + }; + struct bucket_table *tbl; + struct rhash_head *he; + unsigned int hash; - spin_lock(&state_lock); - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, - lockdep_is_held(&state_lock)) { - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { - if (refcount_inc_not_zero(&fp->fi_ref)) - ret = fp; - } else if (d_inode(fh->fh_dentry) == fp->fi_inode) - fp->fi_aliased = alias_found = true; - } - if (likely(ret == NULL)) { - nfsd4_init_file(fh, hashval, new); - new->fi_aliased = alias_found; - ret = new; + /* + * rhashtable guarantees small buckets, thus this loop stays + * efficient. + */ + rcu_read_lock(); + tbl = rht_dereference_rcu(ht->tbl, ht); + hash = rht_key_hashfn(ht, tbl, fhp, nfs4_file_rhash_params); + bkt = rht_bucket(tbl, hash); + rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) { + struct nfs4_file *fi; + + fi = rht_obj(ht, he); + if (nfs4_file_obj_cmpfn(&arg, fi) == 0) + continue; + if (d_inode(fhp->fh_dentry) == fi->fi_inode) { + fi->fi_aliased = true; + new->fi_aliased = true; + } } - spin_unlock(&state_lock); - return ret; + rcu_read_unlock(); } -static struct nfs4_file * find_file(struct svc_fh *fh) +static noinline struct nfs4_file * +find_or_hash_nfs4_file(struct nfs4_file *new, const struct svc_fh *fhp) { - struct nfs4_file *fp; - unsigned int hashval = file_hashval(fh); + struct nfs4_file *fi; - rcu_read_lock(); - fp = find_file_locked(fh, hashval); - rcu_read_unlock(); - return fp; -} + init_nfs4_file(fhp, new); -static struct nfs4_file * -find_or_add_file(struct nfs4_file *new, struct svc_fh *fh) -{ - struct nfs4_file *fp; - unsigned int hashval = file_hashval(fh); + fi = rhashtable_lookup_get_insert_key(&nfs4_file_rhashtbl, + fhp, &new->fi_rhash, + nfs4_file_rhash_params); + if (!fi) { + fi = new; + goto check_aliases; + } + if (IS_ERR(fi)) /* or BUG? */ + return NULL; + if (!refcount_inc_not_zero(&fi->fi_ref)) + fi = new; - rcu_read_lock(); - fp = find_file_locked(fh, hashval); - rcu_read_unlock(); - if (fp) - return fp; +check_aliases: + check_nfs4_file_aliases_locked(fi, fhp); + + return fi; +} - return insert_file(new, fh, hashval); +static void unhash_nfs4_file(struct nfs4_file *fi) +{ + rhashtable_remove_fast(&nfs4_file_rhashtbl, &fi->fi_rhash, + nfs4_file_rhash_params); } /* @@ -4703,9 +4778,10 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) struct nfs4_file *fp; __be32 ret = nfs_ok; - fp = find_file(current_fh); + fp = find_nfs4_file(current_fh); if (!fp) return ret; + /* Check for conflicting share reservations */ spin_lock(&fp->fi_lock); if (fp->fi_share_deny & deny_type) @@ -5548,7 +5624,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * and check for delegations in the process of being recalled. * If not found, create the nfs4_file struct */ - fp = find_or_add_file(open->op_file, current_fh); + fp = find_or_hash_nfs4_file(open->op_file, current_fh); + if (unlikely(!fp)) + return nfserr_jukebox; if (fp != open->op_file) { status = nfs4_check_deleg(cl, open, &dp); if (status) @@ -7905,10 +7983,16 @@ nfs4_state_start(void) { int ret; - ret = nfsd4_create_callback_queue(); + ret = rhashtable_init(&nfs4_file_rhashtbl, &nfs4_file_rhash_params); if (ret) return ret; + ret = nfsd4_create_callback_queue(); + if (ret) { + rhashtable_destroy(&nfs4_file_rhashtbl); + return ret; + } + set_max_delegations(); return 0; } @@ -7939,6 +8023,7 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); + rhashtable_destroy(&nfs4_file_rhashtbl); #ifdef CONFIG_NFSD_V4_2_INTER_SSC nfsd4_ssc_shutdown_umount(nn); #endif diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index ae596dbf8667..879f085bc39e 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -536,16 +536,13 @@ struct nfs4_clnt_odstate { * inode can have multiple filehandles associated with it, so there is * (potentially) a many to one relationship between this struct and struct * inode. - * - * These are hashed by filehandle in the file_hashtbl, which is protected by - * the global state_lock spinlock. */ struct nfs4_file { refcount_t fi_ref; struct inode * fi_inode; bool fi_aliased; spinlock_t fi_lock; - struct hlist_node fi_hash; /* hash on fi_fhandle */ + struct rhash_head fi_rhash; struct list_head fi_stateids; union { struct list_head fi_delegations;