diff mbox series

[v7,08/21] NFS: Adjust the amount of readahead performed by NFS readdir

Message ID 20220223211305.296816-9-trondmy@kernel.org (mailing list archive)
State New, archived
Headers show
Series Readdir improvements | expand

Commit Message

Trond Myklebust Feb. 23, 2022, 9:12 p.m. UTC
From: Trond Myklebust <trond.myklebust@hammerspace.com>

The current NFS readdir code will always try to maximise the amount of
readahead it performs on the assumption that we can cache anything that
isn't immediately read by the process.
There are several cases where this assumption breaks down, including
when the 'ls -l' heuristic kicks in to try to force use of readdirplus
as a batch replacement for lookup/getattr.

This patch therefore tries to tone down the amount of readahead we
perform, and adjust it to try to match the amount of data being
requested by user space.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c           | 55 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/nfs_fs.h |  1 +
 2 files changed, 55 insertions(+), 1 deletion(-)

Comments

Anna Schumaker Feb. 24, 2022, 4:30 p.m. UTC | #1
Hi Trond,

On Wed, Feb 23, 2022 at 8:11 PM <trondmy@kernel.org> wrote:
>
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>
> The current NFS readdir code will always try to maximise the amount of
> readahead it performs on the assumption that we can cache anything that
> isn't immediately read by the process.
> There are several cases where this assumption breaks down, including
> when the 'ls -l' heuristic kicks in to try to force use of readdirplus
> as a batch replacement for lookup/getattr.
>
> This patch therefore tries to tone down the amount of readahead we
> perform, and adjust it to try to match the amount of data being
> requested by user space.

I'm seeing cthon basic tests fail at this patch, but I'm unsure if it
would have started now or in patches 6 or 7 due to the earlier compile
error. The other cthon tests still pass, however:

Thu Feb 24 11:27:44 EST 2022
./server -b -o tcp,v3,sec=sys -m /mnt/nfsv3tcp -p /srv/test/anna/nfsv3tcp server
./server -b -o proto=tcp,sec=sys,v4.0 -m /mnt/nfsv4tcp -p
/srv/test/anna/nfsv4tcp server
./server -b -o proto=tcp,sec=sys,v4.1 -m /mnt/nfsv41tcp -p
/srv/test/anna/nfsv41tcp server
./server -b -o proto=tcp,sec=sys,v4.2 -m /mnt/nfsv42tcp -p
/srv/test/anna/nfsv42tcp server
Waiting for 'b' to finish...
The '-b' test using '-o tcp,v3,sec=sys' args to server: Failed!!
The '-b' test using '-o proto=tcp,sec=sys,v4.0' args to server: Failed!!
The '-b' test using '-o proto=tcp,sec=sys,v4.2' args to server: Failed!!
The '-b' test using '-o proto=tcp,sec=sys,v4.1' args to server: Failed!!
 Done: 11:27:46

Anna

>
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>  fs/nfs/dir.c           | 55 +++++++++++++++++++++++++++++++++++++++++-
>  include/linux/nfs_fs.h |  1 +
>  2 files changed, 55 insertions(+), 1 deletion(-)
>
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index 70c0db877815..83933b7018ea 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -69,6 +69,8 @@ const struct address_space_operations nfs_dir_aops = {
>         .freepage = nfs_readdir_clear_array,
>  };
>
> +#define NFS_INIT_DTSIZE PAGE_SIZE
> +
>  static struct nfs_open_dir_context *
>  alloc_nfs_open_dir_context(struct inode *dir)
>  {
> @@ -78,6 +80,7 @@ alloc_nfs_open_dir_context(struct inode *dir)
>         ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
>         if (ctx != NULL) {
>                 ctx->attr_gencount = nfsi->attr_gencount;
> +               ctx->dtsize = NFS_INIT_DTSIZE;
>                 spin_lock(&dir->i_lock);
>                 if (list_empty(&nfsi->open_files) &&
>                     (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
> @@ -153,6 +156,7 @@ struct nfs_readdir_descriptor {
>         struct page     *page;
>         struct dir_context *ctx;
>         pgoff_t         page_index;
> +       pgoff_t         page_index_max;
>         u64             dir_cookie;
>         u64             last_cookie;
>         u64             dup_cookie;
> @@ -165,12 +169,36 @@ struct nfs_readdir_descriptor {
>         unsigned long   gencount;
>         unsigned long   attr_gencount;
>         unsigned int    cache_entry_index;
> +       unsigned int    buffer_fills;
> +       unsigned int    dtsize;
>         signed char duped;
>         bool plus;
>         bool eob;
>         bool eof;
>  };
>
> +static void nfs_set_dtsize(struct nfs_readdir_descriptor *desc, unsigned int sz)
> +{
> +       struct nfs_server *server = NFS_SERVER(file_inode(desc->file));
> +       unsigned int maxsize = server->dtsize;
> +
> +       if (sz > maxsize)
> +               sz = maxsize;
> +       if (sz < NFS_MIN_FILE_IO_SIZE)
> +               sz = NFS_MIN_FILE_IO_SIZE;
> +       desc->dtsize = sz;
> +}
> +
> +static void nfs_shrink_dtsize(struct nfs_readdir_descriptor *desc)
> +{
> +       nfs_set_dtsize(desc, desc->dtsize >> 1);
> +}
> +
> +static void nfs_grow_dtsize(struct nfs_readdir_descriptor *desc)
> +{
> +       nfs_set_dtsize(desc, desc->dtsize << 1);
> +}
> +
>  static void nfs_readdir_array_init(struct nfs_cache_array *array)
>  {
>         memset(array, 0, sizeof(struct nfs_cache_array));
> @@ -774,6 +802,7 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
>                                 break;
>                         arrays++;
>                         *arrays = page = new;
> +                       desc->page_index_max++;
>                 } else {
>                         new = nfs_readdir_page_get_next(mapping,
>                                                         page->index + 1,
> @@ -783,6 +812,7 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
>                         if (page != *arrays)
>                                 nfs_readdir_page_unlock_and_put(page);
>                         page = new;
> +                       desc->page_index_max = new->index;
>                 }
>                 status = nfs_readdir_add_to_array(entry, page);
>         } while (!status && !entry->eof);
> @@ -848,7 +878,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
>         struct nfs_entry *entry;
>         size_t array_size;
>         struct inode *inode = file_inode(desc->file);
> -       size_t dtsize = NFS_SERVER(inode)->dtsize;
> +       unsigned int dtsize = desc->dtsize;
>         int status = -ENOMEM;
>
>         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
> @@ -884,6 +914,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
>
>                 status = nfs_readdir_page_filler(desc, entry, pages, pglen,
>                                                  arrays, narrays);
> +               desc->buffer_fills++;
>         } while (!status && nfs_readdir_page_needs_filling(page) &&
>                 page_mapping(page));
>
> @@ -931,6 +962,7 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
>         if (!desc->page)
>                 return -ENOMEM;
>         if (nfs_readdir_page_needs_filling(desc->page)) {
> +               desc->page_index_max = desc->page_index;
>                 res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf,
>                                                &desc->page, 1);
>                 if (res < 0) {
> @@ -1067,6 +1099,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
>         desc->cache_entry_index = 0;
>         desc->last_cookie = desc->dir_cookie;
>         desc->duped = 0;
> +       desc->page_index_max = 0;
>
>         status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz);
>
> @@ -1076,10 +1109,22 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
>         }
>         desc->page = NULL;
>
> +       /*
> +        * Grow the dtsize if we have to go back for more pages,
> +        * or shrink it if we're reading too many.
> +        */
> +       if (!desc->eof) {
> +               if (!desc->eob)
> +                       nfs_grow_dtsize(desc);
> +               else if (desc->buffer_fills == 1 &&
> +                        i < (desc->page_index_max >> 1))
> +                       nfs_shrink_dtsize(desc);
> +       }
>
>         for (i = 0; i < sz && arrays[i]; i++)
>                 nfs_readdir_page_array_free(arrays[i]);
>  out:
> +       desc->page_index_max = -1;
>         kfree(arrays);
>         dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
>         return status;
> @@ -1118,6 +1163,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
>         desc->file = file;
>         desc->ctx = ctx;
>         desc->plus = nfs_use_readdirplus(inode, ctx);
> +       desc->page_index_max = -1;
>
>         spin_lock(&file->f_lock);
>         desc->dir_cookie = dir_ctx->dir_cookie;
> @@ -1128,6 +1174,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
>         desc->last_cookie = dir_ctx->last_cookie;
>         desc->attr_gencount = dir_ctx->attr_gencount;
>         desc->eof = dir_ctx->eof;
> +       nfs_set_dtsize(desc, dir_ctx->dtsize);
>         memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
>         spin_unlock(&file->f_lock);
>
> @@ -1169,6 +1216,11 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
>
>                 nfs_do_filldir(desc, nfsi->cookieverf);
>                 nfs_readdir_page_unlock_and_put_cached(desc);
> +               if (desc->eob || desc->eof)
> +                       break;
> +               /* Grow the dtsize if we have to go back for more pages */
> +               if (desc->page_index == desc->page_index_max)
> +                       nfs_grow_dtsize(desc);
>         } while (!desc->eob && !desc->eof);
>
>         spin_lock(&file->f_lock);
> @@ -1179,6 +1231,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
>         dir_ctx->attr_gencount = desc->attr_gencount;
>         dir_ctx->page_index = desc->page_index;
>         dir_ctx->eof = desc->eof;
> +       dir_ctx->dtsize = desc->dtsize;
>         memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
>         spin_unlock(&file->f_lock);
>  out_free:
> diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
> index 1c533f2c1f36..691a27936849 100644
> --- a/include/linux/nfs_fs.h
> +++ b/include/linux/nfs_fs.h
> @@ -107,6 +107,7 @@ struct nfs_open_dir_context {
>         __u64 dup_cookie;
>         __u64 last_cookie;
>         pgoff_t page_index;
> +       unsigned int dtsize;
>         signed char duped;
>         bool eof;
>  };
> --
> 2.35.1
>
diff mbox series

Patch

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 70c0db877815..83933b7018ea 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -69,6 +69,8 @@  const struct address_space_operations nfs_dir_aops = {
 	.freepage = nfs_readdir_clear_array,
 };
 
+#define NFS_INIT_DTSIZE PAGE_SIZE
+
 static struct nfs_open_dir_context *
 alloc_nfs_open_dir_context(struct inode *dir)
 {
@@ -78,6 +80,7 @@  alloc_nfs_open_dir_context(struct inode *dir)
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
 	if (ctx != NULL) {
 		ctx->attr_gencount = nfsi->attr_gencount;
+		ctx->dtsize = NFS_INIT_DTSIZE;
 		spin_lock(&dir->i_lock);
 		if (list_empty(&nfsi->open_files) &&
 		    (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
@@ -153,6 +156,7 @@  struct nfs_readdir_descriptor {
 	struct page	*page;
 	struct dir_context *ctx;
 	pgoff_t		page_index;
+	pgoff_t		page_index_max;
 	u64		dir_cookie;
 	u64		last_cookie;
 	u64		dup_cookie;
@@ -165,12 +169,36 @@  struct nfs_readdir_descriptor {
 	unsigned long	gencount;
 	unsigned long	attr_gencount;
 	unsigned int	cache_entry_index;
+	unsigned int	buffer_fills;
+	unsigned int	dtsize;
 	signed char duped;
 	bool plus;
 	bool eob;
 	bool eof;
 };
 
+static void nfs_set_dtsize(struct nfs_readdir_descriptor *desc, unsigned int sz)
+{
+	struct nfs_server *server = NFS_SERVER(file_inode(desc->file));
+	unsigned int maxsize = server->dtsize;
+
+	if (sz > maxsize)
+		sz = maxsize;
+	if (sz < NFS_MIN_FILE_IO_SIZE)
+		sz = NFS_MIN_FILE_IO_SIZE;
+	desc->dtsize = sz;
+}
+
+static void nfs_shrink_dtsize(struct nfs_readdir_descriptor *desc)
+{
+	nfs_set_dtsize(desc, desc->dtsize >> 1);
+}
+
+static void nfs_grow_dtsize(struct nfs_readdir_descriptor *desc)
+{
+	nfs_set_dtsize(desc, desc->dtsize << 1);
+}
+
 static void nfs_readdir_array_init(struct nfs_cache_array *array)
 {
 	memset(array, 0, sizeof(struct nfs_cache_array));
@@ -774,6 +802,7 @@  static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
 				break;
 			arrays++;
 			*arrays = page = new;
+			desc->page_index_max++;
 		} else {
 			new = nfs_readdir_page_get_next(mapping,
 							page->index + 1,
@@ -783,6 +812,7 @@  static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
 			if (page != *arrays)
 				nfs_readdir_page_unlock_and_put(page);
 			page = new;
+			desc->page_index_max = new->index;
 		}
 		status = nfs_readdir_add_to_array(entry, page);
 	} while (!status && !entry->eof);
@@ -848,7 +878,7 @@  static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
 	struct nfs_entry *entry;
 	size_t array_size;
 	struct inode *inode = file_inode(desc->file);
-	size_t dtsize = NFS_SERVER(inode)->dtsize;
+	unsigned int dtsize = desc->dtsize;
 	int status = -ENOMEM;
 
 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
@@ -884,6 +914,7 @@  static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
 
 		status = nfs_readdir_page_filler(desc, entry, pages, pglen,
 						 arrays, narrays);
+		desc->buffer_fills++;
 	} while (!status && nfs_readdir_page_needs_filling(page) &&
 		page_mapping(page));
 
@@ -931,6 +962,7 @@  static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
 	if (!desc->page)
 		return -ENOMEM;
 	if (nfs_readdir_page_needs_filling(desc->page)) {
+		desc->page_index_max = desc->page_index;
 		res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf,
 					       &desc->page, 1);
 		if (res < 0) {
@@ -1067,6 +1099,7 @@  static int uncached_readdir(struct nfs_readdir_descriptor *desc)
 	desc->cache_entry_index = 0;
 	desc->last_cookie = desc->dir_cookie;
 	desc->duped = 0;
+	desc->page_index_max = 0;
 
 	status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz);
 
@@ -1076,10 +1109,22 @@  static int uncached_readdir(struct nfs_readdir_descriptor *desc)
 	}
 	desc->page = NULL;
 
+	/*
+	 * Grow the dtsize if we have to go back for more pages,
+	 * or shrink it if we're reading too many.
+	 */
+	if (!desc->eof) {
+		if (!desc->eob)
+			nfs_grow_dtsize(desc);
+		else if (desc->buffer_fills == 1 &&
+			 i < (desc->page_index_max >> 1))
+			nfs_shrink_dtsize(desc);
+	}
 
 	for (i = 0; i < sz && arrays[i]; i++)
 		nfs_readdir_page_array_free(arrays[i]);
 out:
+	desc->page_index_max = -1;
 	kfree(arrays);
 	dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
 	return status;
@@ -1118,6 +1163,7 @@  static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	desc->file = file;
 	desc->ctx = ctx;
 	desc->plus = nfs_use_readdirplus(inode, ctx);
+	desc->page_index_max = -1;
 
 	spin_lock(&file->f_lock);
 	desc->dir_cookie = dir_ctx->dir_cookie;
@@ -1128,6 +1174,7 @@  static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	desc->last_cookie = dir_ctx->last_cookie;
 	desc->attr_gencount = dir_ctx->attr_gencount;
 	desc->eof = dir_ctx->eof;
+	nfs_set_dtsize(desc, dir_ctx->dtsize);
 	memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
 	spin_unlock(&file->f_lock);
 
@@ -1169,6 +1216,11 @@  static int nfs_readdir(struct file *file, struct dir_context *ctx)
 
 		nfs_do_filldir(desc, nfsi->cookieverf);
 		nfs_readdir_page_unlock_and_put_cached(desc);
+		if (desc->eob || desc->eof)
+			break;
+		/* Grow the dtsize if we have to go back for more pages */
+		if (desc->page_index == desc->page_index_max)
+			nfs_grow_dtsize(desc);
 	} while (!desc->eob && !desc->eof);
 
 	spin_lock(&file->f_lock);
@@ -1179,6 +1231,7 @@  static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	dir_ctx->attr_gencount = desc->attr_gencount;
 	dir_ctx->page_index = desc->page_index;
 	dir_ctx->eof = desc->eof;
+	dir_ctx->dtsize = desc->dtsize;
 	memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
 	spin_unlock(&file->f_lock);
 out_free:
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1c533f2c1f36..691a27936849 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -107,6 +107,7 @@  struct nfs_open_dir_context {
 	__u64 dup_cookie;
 	__u64 last_cookie;
 	pgoff_t page_index;
+	unsigned int dtsize;
 	signed char duped;
 	bool eof;
 };