diff mbox series

[v2,2/2] mm: shmem: improve the tmpfs large folio read performance

Message ID 2129a21a5b9f77d3bb7ddec152c009ce7c5653c4.1729218573.git.baolin.wang@linux.alibaba.com (mailing list archive)
State New
Headers show
Series Improve the tmpfs large folio read performance | expand

Commit Message

Baolin Wang Oct. 18, 2024, 3 a.m. UTC
The tmpfs has already supported the PMD-sized large folios, but the tmpfs
read operation still performs copying at the PAGE SIZE granularity, which
is unreasonable. This patch changes to copy data at the folio granularity,
which can improve the read performance, as well as changing to use folio
related functions.

Moreoever, if a large folio has a subpage that is hwpoisoned, it will still
fallback to page granularity copying.

Use 'fio bs=64k' to read a 1G tmpfs file populated with 2M THPs, and I can
see about 20% performance improvement, and no regression with bs=4k.
Before the patch:
READ: bw=10.0GiB/s

After the patch:
READ: bw=12.0GiB/s

Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
 mm/shmem.c | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

Comments

Yang Shi Oct. 18, 2024, 6:38 p.m. UTC | #1
On Thu, Oct 17, 2024 at 8:00 PM Baolin Wang
<baolin.wang@linux.alibaba.com> wrote:
>
> The tmpfs has already supported the PMD-sized large folios, but the tmpfs
> read operation still performs copying at the PAGE SIZE granularity, which
> is unreasonable. This patch changes to copy data at the folio granularity,
> which can improve the read performance, as well as changing to use folio
> related functions.
>
> Moreoever, if a large folio has a subpage that is hwpoisoned, it will still
> fallback to page granularity copying.

s/Moreoever/Moreover

>
> Use 'fio bs=64k' to read a 1G tmpfs file populated with 2M THPs, and I can
> see about 20% performance improvement, and no regression with bs=4k.
> Before the patch:
> READ: bw=10.0GiB/s
>
> After the patch:
> READ: bw=12.0GiB/s
>
> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>

The patch looks fine to me. Reviewed-by: Yang Shi <shy828301@gmail.com>


> ---
>  mm/shmem.c | 34 ++++++++++++++++++++++++----------
>  1 file changed, 24 insertions(+), 10 deletions(-)
>
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 93642aa8d1aa..cbefd9801f6b 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -3107,13 +3107,13 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>         int error = 0;
>         ssize_t retval = 0;
>
> -       offset = iocb->ki_pos & ~PAGE_MASK;
> -
>         for (;;) {
>                 struct folio *folio = NULL;
>                 struct page *page = NULL;
>                 unsigned long nr, ret;
>                 loff_t end_offset, i_size = i_size_read(inode);
> +               bool fallback_page_copy = false;
> +               size_t fsize;
>
>                 if (unlikely(iocb->ki_pos >= i_size))
>                         break;
> @@ -3134,6 +3134,10 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>                                 error = -EIO;
>                                 break;
>                         }
> +
> +                       if (folio_test_large(folio) &&
> +                           folio_test_has_hwpoisoned(folio))
> +                               fallback_page_copy = true;
>                 }
>
>                 /*
> @@ -3147,7 +3151,12 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>                         break;
>                 }
>                 end_offset = min_t(loff_t, i_size, iocb->ki_pos + to->count);
> -               nr = min_t(loff_t, end_offset - iocb->ki_pos, PAGE_SIZE - offset);
> +               if (folio && likely(!fallback_page_copy))
> +                       fsize = folio_size(folio);
> +               else
> +                       fsize = PAGE_SIZE;
> +               offset = iocb->ki_pos & (fsize - 1);
> +               nr = min_t(loff_t, end_offset - iocb->ki_pos, fsize - offset);
>
>                 if (folio) {
>                         /*
> @@ -3155,10 +3164,15 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>                          * virtual addresses, take care about potential aliasing
>                          * before reading the page on the kernel side.
>                          */
> -                       if (mapping_writably_mapped(mapping))
> -                               flush_dcache_page(page);
> +                       if (mapping_writably_mapped(mapping)) {
> +                               if (likely(!fallback_page_copy))
> +                                       flush_dcache_folio(folio);
> +                               else
> +                                       flush_dcache_page(page);
> +                       }
> +
>                         /*
> -                        * Mark the page accessed if we read the beginning.
> +                        * Mark the folio accessed if we read the beginning.
>                          */
>                         if (!offset)
>                                 folio_mark_accessed(folio);
> @@ -3166,9 +3180,11 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>                          * Ok, we have the page, and it's up-to-date, so
>                          * now we can copy it to user space...
>                          */
> -                       ret = copy_page_to_iter(page, offset, nr, to);
> +                       if (likely(!fallback_page_copy))
> +                               ret = copy_folio_to_iter(folio, offset, nr, to);
> +                       else
> +                               ret = copy_page_to_iter(page, offset, nr, to);
>                         folio_put(folio);
> -
>                 } else if (user_backed_iter(to)) {
>                         /*
>                          * Copy to user tends to be so well optimized, but
> @@ -3186,8 +3202,6 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>                 }
>
>                 retval += ret;
> -               offset += ret;
> -               offset &= ~PAGE_MASK;
>                 iocb->ki_pos += ret;
>
>                 if (!iov_iter_count(to))
> --
> 2.39.3
>
diff mbox series

Patch

diff --git a/mm/shmem.c b/mm/shmem.c
index 93642aa8d1aa..cbefd9801f6b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3107,13 +3107,13 @@  static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	int error = 0;
 	ssize_t retval = 0;
 
-	offset = iocb->ki_pos & ~PAGE_MASK;
-
 	for (;;) {
 		struct folio *folio = NULL;
 		struct page *page = NULL;
 		unsigned long nr, ret;
 		loff_t end_offset, i_size = i_size_read(inode);
+		bool fallback_page_copy = false;
+		size_t fsize;
 
 		if (unlikely(iocb->ki_pos >= i_size))
 			break;
@@ -3134,6 +3134,10 @@  static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 				error = -EIO;
 				break;
 			}
+
+			if (folio_test_large(folio) &&
+			    folio_test_has_hwpoisoned(folio))
+				fallback_page_copy = true;
 		}
 
 		/*
@@ -3147,7 +3151,12 @@  static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 			break;
 		}
 		end_offset = min_t(loff_t, i_size, iocb->ki_pos + to->count);
-		nr = min_t(loff_t, end_offset - iocb->ki_pos, PAGE_SIZE - offset);
+		if (folio && likely(!fallback_page_copy))
+			fsize = folio_size(folio);
+		else
+			fsize = PAGE_SIZE;
+		offset = iocb->ki_pos & (fsize - 1);
+		nr = min_t(loff_t, end_offset - iocb->ki_pos, fsize - offset);
 
 		if (folio) {
 			/*
@@ -3155,10 +3164,15 @@  static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 			 * virtual addresses, take care about potential aliasing
 			 * before reading the page on the kernel side.
 			 */
-			if (mapping_writably_mapped(mapping))
-				flush_dcache_page(page);
+			if (mapping_writably_mapped(mapping)) {
+				if (likely(!fallback_page_copy))
+					flush_dcache_folio(folio);
+				else
+					flush_dcache_page(page);
+			}
+
 			/*
-			 * Mark the page accessed if we read the beginning.
+			 * Mark the folio accessed if we read the beginning.
 			 */
 			if (!offset)
 				folio_mark_accessed(folio);
@@ -3166,9 +3180,11 @@  static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 			 * Ok, we have the page, and it's up-to-date, so
 			 * now we can copy it to user space...
 			 */
-			ret = copy_page_to_iter(page, offset, nr, to);
+			if (likely(!fallback_page_copy))
+				ret = copy_folio_to_iter(folio, offset, nr, to);
+			else
+				ret = copy_page_to_iter(page, offset, nr, to);
 			folio_put(folio);
-
 		} else if (user_backed_iter(to)) {
 			/*
 			 * Copy to user tends to be so well optimized, but
@@ -3186,8 +3202,6 @@  static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		}
 
 		retval += ret;
-		offset += ret;
-		offset &= ~PAGE_MASK;
 		iocb->ki_pos += ret;
 
 		if (!iov_iter_count(to))