diff mbox series

[v3,17/32] NFSD: Never call nfsd_file_gc() in foreground paths

Message ID 165730473096.28142.6742811495100296997.stgit@klimt.1015granger.net (mailing list archive)
State Not Applicable
Headers show
Series Overhaul NFSD filecache | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Chuck Lever July 8, 2022, 6:25 p.m. UTC
The checks in nfsd_file_acquire() and nfsd_file_put() that directly
invoke filecache garbage collection are intended to keep cache
occupancy between a low- and high-watermark. The reason to limit the
capacity of the filecache is to keep filecache lookups reasonably
fast.

However, invoking garbage collection at those points has some
undesirable negative impacts. Files that are held open by NFSv4
clients often push the occupancy of the filecache over these
watermarks. At that point:

- Every call to nfsd_file_acquire() and nfsd_file_put() results in
  an LRU walk. This has the same effect on lookup latency as long
  chains in the hash table.
- Garbage collection will then run on every nfsd thread, causing a
  lot of unnecessary lock contention.
- Limiting cache capacity pushes out files used only by NFSv3
  clients, which are the type of files the filecache is supposed to
  help.

To address those negative impacts, remove the direct calls to the
garbage collector. Subsequent patches will address maintaining
lookup efficiency as cache capacity increases.

Suggested-by: Wang Yugui <wangyugui@e16-tech.com>
Suggested-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/filecache.c |   10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

Comments

Jeff Layton July 8, 2022, 7:43 p.m. UTC | #1
On Fri, 2022-07-08 at 14:25 -0400, Chuck Lever wrote:
> The checks in nfsd_file_acquire() and nfsd_file_put() that directly
> invoke filecache garbage collection are intended to keep cache
> occupancy between a low- and high-watermark. The reason to limit the
> capacity of the filecache is to keep filecache lookups reasonably
> fast.
> 
> However, invoking garbage collection at those points has some
> undesirable negative impacts. Files that are held open by NFSv4
> clients often push the occupancy of the filecache over these
> watermarks. At that point:
> 
> - Every call to nfsd_file_acquire() and nfsd_file_put() results in
>   an LRU walk. This has the same effect on lookup latency as long
>   chains in the hash table.
> - Garbage collection will then run on every nfsd thread, causing a
>   lot of unnecessary lock contention.
> - Limiting cache capacity pushes out files used only by NFSv3
>   clients, which are the type of files the filecache is supposed to
>   help.
> 
> To address those negative impacts, remove the direct calls to the
> garbage collector. Subsequent patches will address maintaining
> lookup efficiency as cache capacity increases.
> 
> Suggested-by: Wang Yugui <wangyugui@e16-tech.com>
> Suggested-by: Dave Chinner <david@fromorbit.com>
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  fs/nfsd/filecache.c |   10 +---------
>  1 file changed, 1 insertion(+), 9 deletions(-)
> 
> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
> index bd6ba63f69ae..faa8588663d6 100644
> --- a/fs/nfsd/filecache.c
> +++ b/fs/nfsd/filecache.c
> @@ -29,8 +29,6 @@
>  #define NFSD_LAUNDRETTE_DELAY		     (2 * HZ)
>  
>  #define NFSD_FILE_SHUTDOWN		     (1)
> -#define NFSD_FILE_LRU_THRESHOLD		     (4096UL)
> -#define NFSD_FILE_LRU_LIMIT		     (NFSD_FILE_LRU_THRESHOLD << 2)
>  
>  /* We only care about NFSD_MAY_READ/WRITE for this cache */
>  #define NFSD_FILE_MAY_MASK	(NFSD_MAY_READ|NFSD_MAY_WRITE)
> @@ -66,8 +64,6 @@ static struct fsnotify_group		*nfsd_file_fsnotify_group;
>  static atomic_long_t			nfsd_filecache_count;
>  static struct delayed_work		nfsd_filecache_laundrette;
>  
> -static void nfsd_file_gc(void);
> -
>  static void
>  nfsd_file_schedule_laundrette(void)
>  {
> @@ -350,9 +346,6 @@ nfsd_file_put(struct nfsd_file *nf)
>  		nfsd_file_schedule_laundrette();
>  	} else
>  		nfsd_file_put_noref(nf);
> -
> -	if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
> -		nfsd_file_gc();

This may be addressed in later patches, but instead of just removing
these, would it be better to instead call
nfsd_file_schedule_laundrette() ?

>  }
>  
>  struct nfsd_file *
> @@ -1075,8 +1068,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>  	nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
>  			nfsd_file_hashtbl[hashval].nfb_count);
>  	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
> -	if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD)
> -		nfsd_file_gc();
> +	atomic_long_inc(&nfsd_filecache_count);
>  
>  	nf->nf_mark = nfsd_file_mark_find_or_create(nf);
>  	if (nf->nf_mark) {
> 
>
Chuck Lever July 8, 2022, 7:45 p.m. UTC | #2
> On Jul 8, 2022, at 3:43 PM, Jeff Layton <jlayton@redhat.com> wrote:
> 
> On Fri, 2022-07-08 at 14:25 -0400, Chuck Lever wrote:
>> The checks in nfsd_file_acquire() and nfsd_file_put() that directly
>> invoke filecache garbage collection are intended to keep cache
>> occupancy between a low- and high-watermark. The reason to limit the
>> capacity of the filecache is to keep filecache lookups reasonably
>> fast.
>> 
>> However, invoking garbage collection at those points has some
>> undesirable negative impacts. Files that are held open by NFSv4
>> clients often push the occupancy of the filecache over these
>> watermarks. At that point:
>> 
>> - Every call to nfsd_file_acquire() and nfsd_file_put() results in
>> an LRU walk. This has the same effect on lookup latency as long
>> chains in the hash table.
>> - Garbage collection will then run on every nfsd thread, causing a
>> lot of unnecessary lock contention.
>> - Limiting cache capacity pushes out files used only by NFSv3
>> clients, which are the type of files the filecache is supposed to
>> help.
>> 
>> To address those negative impacts, remove the direct calls to the
>> garbage collector. Subsequent patches will address maintaining
>> lookup efficiency as cache capacity increases.
>> 
>> Suggested-by: Wang Yugui <wangyugui@e16-tech.com>
>> Suggested-by: Dave Chinner <david@fromorbit.com>
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>> fs/nfsd/filecache.c | 10 +---------
>> 1 file changed, 1 insertion(+), 9 deletions(-)
>> 
>> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
>> index bd6ba63f69ae..faa8588663d6 100644
>> --- a/fs/nfsd/filecache.c
>> +++ b/fs/nfsd/filecache.c
>> @@ -29,8 +29,6 @@
>> #define NFSD_LAUNDRETTE_DELAY		 (2 * HZ)
>> 
>> #define NFSD_FILE_SHUTDOWN		 (1)
>> -#define NFSD_FILE_LRU_THRESHOLD		 (4096UL)
>> -#define NFSD_FILE_LRU_LIMIT		 (NFSD_FILE_LRU_THRESHOLD << 2)
>> 
>> /* We only care about NFSD_MAY_READ/WRITE for this cache */
>> #define NFSD_FILE_MAY_MASK	(NFSD_MAY_READ|NFSD_MAY_WRITE)
>> @@ -66,8 +64,6 @@ static struct fsnotify_group		*nfsd_file_fsnotify_group;
>> static atomic_long_t			nfsd_filecache_count;
>> static struct delayed_work		nfsd_filecache_laundrette;
>> 
>> -static void nfsd_file_gc(void);
>> -
>> static void
>> nfsd_file_schedule_laundrette(void)
>> {
>> @@ -350,9 +346,6 @@ nfsd_file_put(struct nfsd_file *nf)
>> 		nfsd_file_schedule_laundrette();
>> 	} else
>> 		nfsd_file_put_noref(nf);
>> -
>> -	if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
>> -		nfsd_file_gc();
> 
> This may be addressed in later patches, but instead of just removing
> these, would it be better to instead call
> nfsd_file_schedule_laundrette() ?

nfsd_file_put() already kicks the laundrette.

I can't see a reason to call the laundrette again; once there are items
on the LRU it seems to run every 2 seconds anyway.


>> }
>> 
>> struct nfsd_file *
>> @@ -1075,8 +1068,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>> 	nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
>> 			nfsd_file_hashtbl[hashval].nfb_count);
>> 	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
>> -	if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD)
>> -		nfsd_file_gc();
>> +	atomic_long_inc(&nfsd_filecache_count);
>> 
>> 	nf->nf_mark = nfsd_file_mark_find_or_create(nf);
>> 	if (nf->nf_mark) {
>> 
>> 
> 
> -- 
> Jeff Layton <jlayton@redhat.com>

--
Chuck Lever
diff mbox series

Patch

diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index bd6ba63f69ae..faa8588663d6 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -29,8 +29,6 @@ 
 #define NFSD_LAUNDRETTE_DELAY		     (2 * HZ)
 
 #define NFSD_FILE_SHUTDOWN		     (1)
-#define NFSD_FILE_LRU_THRESHOLD		     (4096UL)
-#define NFSD_FILE_LRU_LIMIT		     (NFSD_FILE_LRU_THRESHOLD << 2)
 
 /* We only care about NFSD_MAY_READ/WRITE for this cache */
 #define NFSD_FILE_MAY_MASK	(NFSD_MAY_READ|NFSD_MAY_WRITE)
@@ -66,8 +64,6 @@  static struct fsnotify_group		*nfsd_file_fsnotify_group;
 static atomic_long_t			nfsd_filecache_count;
 static struct delayed_work		nfsd_filecache_laundrette;
 
-static void nfsd_file_gc(void);
-
 static void
 nfsd_file_schedule_laundrette(void)
 {
@@ -350,9 +346,6 @@  nfsd_file_put(struct nfsd_file *nf)
 		nfsd_file_schedule_laundrette();
 	} else
 		nfsd_file_put_noref(nf);
-
-	if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
-		nfsd_file_gc();
 }
 
 struct nfsd_file *
@@ -1075,8 +1068,7 @@  nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
 			nfsd_file_hashtbl[hashval].nfb_count);
 	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
-	if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD)
-		nfsd_file_gc();
+	atomic_long_inc(&nfsd_filecache_count);
 
 	nf->nf_mark = nfsd_file_mark_find_or_create(nf);
 	if (nf->nf_mark) {