@@ -62,7 +62,6 @@ static struct nfsd_fcache_bucket *nfsd_file_hashtbl;
static struct list_lru nfsd_file_lru;
static unsigned long nfsd_file_flags;
static struct fsnotify_group *nfsd_file_fsnotify_group;
-static atomic_long_t nfsd_filecache_count;
static atomic_long_t nfsd_file_total_age;
static struct delayed_work nfsd_filecache_laundrette;
@@ -170,7 +169,7 @@ static const struct rhashtable_params nfsd_file_rhash_params = {
static void
nfsd_file_schedule_laundrette(void)
{
- if ((atomic_long_read(&nfsd_filecache_count) == 0) ||
+ if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) ||
test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0)
return;
@@ -282,9 +281,10 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, struct net *net)
nf->nf_file = NULL;
nf->nf_cred = get_current_cred();
nf->nf_net = net;
- nf->nf_flags = 0;
+ nf->nf_flags = BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING);
nf->nf_inode = inode;
- refcount_set(&nf->nf_ref, 1);
+ /* nf_ref is pre-incremented for hash table */
+ refcount_set(&nf->nf_ref, 2);
nf->nf_may = may & NFSD_FILE_MAY_MASK;
if (may & NFSD_MAY_NOT_BREAK_LEASE) {
if (may & NFSD_MAY_WRITE)
@@ -377,40 +377,21 @@ static void nfsd_file_lru_remove(struct nfsd_file *nf)
}
static void
-nfsd_file_do_unhash(struct nfsd_file *nf)
+nfsd_file_hash_remove(struct nfsd_file *nf)
{
- struct inode *inode = nf->nf_inode;
- unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
- NFSD_FILE_HASH_BITS);
-
- lockdep_assert_held(&nfsd_file_hashtbl[hashval].nfb_lock);
-
trace_nfsd_file_unhash(nf);
if (nfsd_file_check_write_error(nf))
nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
- --nfsd_file_hashtbl[hashval].nfb_count;
- hlist_del_rcu(&nf->nf_node);
- atomic_long_dec(&nfsd_filecache_count);
-}
-
-static void
-nfsd_file_hash_remove(struct nfsd_file *nf)
-{
- struct inode *inode = nf->nf_inode;
- unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
- NFSD_FILE_HASH_BITS);
-
- spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
- nfsd_file_do_unhash(nf);
- spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash,
+ nfsd_file_rhash_params);
}
static bool
nfsd_file_unhash(struct nfsd_file *nf)
{
if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
- nfsd_file_do_unhash(nf);
+ nfsd_file_hash_remove(nf);
return true;
}
return false;
@@ -420,9 +401,9 @@ nfsd_file_unhash(struct nfsd_file *nf)
* Return true if the file was unhashed.
*/
static bool
-nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
+nfsd_file_unhash_and_dispose(struct nfsd_file *nf, struct list_head *dispose)
{
- trace_nfsd_file_unhash_and_release_locked(nf);
+ trace_nfsd_file_unhash_and_dispose(nf);
if (!nfsd_file_unhash(nf))
return false;
/* keep final reference for nfsd_file_lru_dispose */
@@ -683,17 +664,21 @@ static struct shrinker nfsd_file_shrinker = {
static void
__nfsd_file_close_inode(struct inode *inode, struct list_head *dispose)
{
- unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
- NFSD_FILE_HASH_BITS);
- struct nfsd_file *nf;
- struct hlist_node *tmp;
-
- spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
- hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
- if (inode == nf->nf_inode)
- nfsd_file_unhash_and_release_locked(nf, dispose);
- }
- spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ struct nfsd_file_lookup_key key = {
+ .type = NFSD_FILE_KEY_INODE,
+ .inode = inode,
+ };
+ struct nfsd_file *nf;
+
+ rcu_read_lock();
+ do {
+ nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
+ nfsd_file_rhash_params);
+ if (!nf)
+ break;
+ nfsd_file_unhash_and_dispose(nf, dispose);
+ } while (1);
+ rcu_read_unlock();
}
/**
@@ -895,30 +880,39 @@ nfsd_file_cache_init(void)
static void
__nfsd_file_cache_purge(struct net *net)
{
- unsigned int i;
- struct nfsd_file *nf;
- struct hlist_node *next;
+ struct rhashtable_iter iter;
+ struct nfsd_file *nf;
LIST_HEAD(dispose);
bool del;
- for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
- struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i];
+ lockdep_assert_held(&nfsd_mutex);
+ if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0)
+ return;
+
+ rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter);
+ do {
+ rhashtable_walk_start(&iter);
- spin_lock(&nfb->nfb_lock);
- hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) {
+ nf = rhashtable_walk_next(&iter);
+ while (!IS_ERR_OR_NULL(nf)) {
if (net && nf->nf_net != net)
continue;
- del = nfsd_file_unhash_and_release_locked(nf, &dispose);
+ del = nfsd_file_unhash_and_dispose(nf, &dispose);
/*
* Deadlock detected! Something marked this entry as
* unhased, but hasn't removed it from the hash list.
*/
WARN_ON_ONCE(!del);
+
+ nf = rhashtable_walk_next(&iter);
}
- spin_unlock(&nfb->nfb_lock);
- nfsd_file_dispose_list(&dispose);
- }
+
+ rhashtable_walk_stop(&iter);
+ } while (nf == ERR_PTR(-EAGAIN));
+ rhashtable_walk_exit(&iter);
+
+ nfsd_file_dispose_list(&dispose);
}
static struct nfsd_fcache_disposal *
@@ -1025,55 +1019,73 @@ nfsd_file_cache_shutdown(void)
}
static struct nfsd_file *
-nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
- unsigned int hashval, struct net *net)
+nfsd_file_find(struct inode *inode, unsigned int may_flags, struct net *net)
{
+ struct nfsd_file_lookup_key key = {
+ .type = NFSD_FILE_KEY_FULL,
+ .inode = inode,
+ .need = may_flags & NFSD_FILE_MAY_MASK,
+ .net = net,
+ .cred = current_cred(),
+ };
struct nfsd_file *nf;
- unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
- hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
- nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) {
- if (nf->nf_may != need)
- continue;
- if (nf->nf_inode != inode)
- continue;
- if (nf->nf_net != net)
- continue;
- if (!nfsd_match_cred(nf->nf_cred, current_cred()))
- continue;
- if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags))
- continue;
- if (nfsd_file_get(nf) != NULL)
- return nf;
- }
- return NULL;
+ rcu_read_lock();
+ nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
+ nfsd_file_rhash_params);
+ if (nf)
+ nf = nfsd_file_get(nf);
+ rcu_read_unlock();
+ return nf;
+}
+
+/*
+ * Atomically insert a new nfsd_file item into nfsd_file_rhash_tbl.
+ *
+ * Return values:
+ * %NULL: @new was inserted successfully
+ * %A valid pointer: @new was not inserted, a matching item is returned
+ * %ERR_PTR: an unexpected error occurred during insertion
+ */
+static struct nfsd_file *nfsd_file_insert(struct nfsd_file *new)
+{
+ struct nfsd_file_lookup_key key = {
+ .type = NFSD_FILE_KEY_FULL,
+ .inode = new->nf_inode,
+ .need = new->nf_flags,
+ .net = new->nf_net,
+ .cred = current_cred(),
+ };
+ struct nfsd_file *nf;
+
+ nf = rhashtable_lookup_get_insert_key(&nfsd_file_rhash_tbl,
+ &key, &new->nf_rhash,
+ nfsd_file_rhash_params);
+ if (!nf)
+ return nf;
+ return nfsd_file_get(nf);
}
/**
- * nfsd_file_is_cached - are there any cached open files for this fh?
- * @inode: inode of the file to check
+ * nfsd_file_is_cached - are there any cached open files for this inode?
+ * @inode: inode to check
*
- * Scan the hashtable for open files that match this fh. Returns true if there
- * are any, and false if not.
+ * Return values:
+ * %true: filecache contains at least one file matching this inode
+ * %false: filecache contains no files matching this inode
*/
bool
nfsd_file_is_cached(struct inode *inode)
{
- bool ret = false;
- struct nfsd_file *nf;
- unsigned int hashval;
-
- hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
- nf_node) {
- if (inode == nf->nf_inode) {
- ret = true;
- break;
- }
- }
- rcu_read_unlock();
+ struct nfsd_file_lookup_key key = {
+ .type = NFSD_FILE_KEY_INODE,
+ .inode = inode,
+ };
+ bool ret = false;
+
+ if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key,
+ nfsd_file_rhash_params) != NULL)
+ ret = true;
trace_nfsd_file_is_cached(inode, (int)ret);
return ret;
}
@@ -1086,7 +1098,6 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct net *net = SVC_NET(rqstp);
struct nfsd_file *nf, *new;
struct inode *inode;
- unsigned int hashval;
bool retry = true;
status = fh_verify(rqstp, fhp, S_IFREG,
@@ -1095,12 +1106,9 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
return status;
inode = d_inode(fhp->fh_dentry);
- hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
retry:
/* Avoid allocation if the item is already in cache */
- rcu_read_lock();
- nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
- rcu_read_unlock();
+ nf = nfsd_file_find(inode, may_flags, net);
if (nf == NULL) {
new = nfsd_file_alloc(inode, may_flags, net);
if (!new) {
@@ -1108,18 +1116,20 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out_status;
}
- spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
- nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
- if (nf == NULL)
+ nf = nfsd_file_insert(new);
+ if (nf == NULL) {
+ nf = new;
goto open_file;
- spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ }
nfsd_file_slab_free(&new->nf_rcu);
+ if (IS_ERR(nf)) {
+ status = nfserr_jukebox;
+ goto out_status;
+ }
}
wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
-
- /* Did construction of this file fail? */
if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
this_cpu_inc(nfsd_file_cons_fails);
if (!retry) {
@@ -1128,6 +1138,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
retry = false;
nfsd_file_put_noref(nf);
+ cond_resched();
goto retry;
}
@@ -1164,18 +1175,6 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
return status;
open_file:
- nf = new;
- /* Take reference for the hashtable */
- refcount_inc(&nf->nf_ref);
- __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
- __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
- hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
- ++nfsd_file_hashtbl[hashval].nfb_count;
- nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
- nfsd_file_hashtbl[hashval].nfb_count);
- spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
- atomic_long_inc(&nfsd_filecache_count);
-
nf->nf_mark = nfsd_file_mark_find_or_create(nf);
if (nf->nf_mark) {
if (open) {
@@ -1190,15 +1189,9 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
* If construction failed, or we raced with a call to unlink()
* then unhash.
*/
- if (status != nfs_ok || inode->i_nlink == 0) {
- bool do_free;
- nfsd_file_lru_remove(nf);
- spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
- do_free = nfsd_file_unhash(nf);
- spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
- if (do_free)
+ if (status != nfs_ok || inode->i_nlink == 0)
+ if (nfsd_file_unhash(nf))
nfsd_file_put_noref(nf);
- }
clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
smp_mb__after_atomic();
wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
@@ -1248,21 +1241,15 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
{
unsigned long releases = 0, pages_flushed = 0, evictions = 0;
unsigned long hits = 0, acquisitions = 0, cons_fails = 0;
- unsigned int i, count = 0, longest = 0;
+ struct bucket_table *tbl;
+ int i;
- /*
- * No need for spinlocks here since we're not terribly interested in
- * accuracy. We do take the nfsd_mutex simply to ensure that we
- * don't end up racing with server shutdown
- */
+ /* Serialize with server shutdown */
mutex_lock(&nfsd_mutex);
- if (nfsd_file_hashtbl) {
- for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
- count += nfsd_file_hashtbl[i].nfb_count;
- longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
- }
- }
- mutex_unlock(&nfsd_mutex);
+
+ rcu_read_lock();
+ tbl = rht_dereference_rcu(nfsd_file_rhash_tbl.tbl, &nfsd_file_rhash_tbl);
+ rcu_read_unlock();
for_each_possible_cpu(i) {
hits += per_cpu(nfsd_file_cache_hits, i);
@@ -1273,8 +1260,8 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
cons_fails += per_cpu(nfsd_file_cons_fails, i);
}
- seq_printf(m, "total entries: %u\n", count);
- seq_printf(m, "longest chain: %u\n", longest);
+ seq_printf(m, "total entries: %d\n", atomic_read(&nfsd_file_rhash_tbl.nelems));
+ seq_printf(m, "hash buckets: %u\n", tbl->size);
seq_printf(m, "lru entries: %lu\n", list_lru_count(&nfsd_file_lru));
seq_printf(m, "cache hits: %lu\n", hits);
seq_printf(m, "acquisitions: %lu\n", acquisitions);
@@ -1287,6 +1274,8 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
seq_printf(m, "mean age (ms): -\n");
seq_printf(m, "pages flushed: %lu\n", pages_flushed);
seq_printf(m, "cons fails: %lu\n", cons_fails);
+
+ mutex_unlock(&nfsd_mutex);
return 0;
}
@@ -734,7 +734,7 @@ DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc);
DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final);
DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
-DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_dispose);
TRACE_EVENT(nfsd_file_acquire,
TP_PROTO(
Enable the filecache hash table to start small, then grow with the workload. Smaller server deployments benefit because there should be lower memory utilization. Larger server deployments should see improved scaling with the number of open files. I know this is a big and messy patch, but there's no good way to rip out and replace a data structure like this. Suggested-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> --- fs/nfsd/filecache.c | 259 ++++++++++++++++++++++++--------------------------- fs/nfsd/trace.h | 2 2 files changed, 125 insertions(+), 136 deletions(-)