Message ID | 20231213-zswap-dstmem-v1-5-896763369d04@bytedance.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm/zswap: dstmem reuse optimizations and cleanups | expand |
On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou <zhouchengming@bytedance.com> wrote: > > Also after the common decompress part goes to __zswap_load(), we can > cleanup the zswap_reclaim_entry() a little. > > Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com> > --- > mm/zswap.c | 23 +++++------------------ > 1 file changed, 5 insertions(+), 18 deletions(-) > > diff --git a/mm/zswap.c b/mm/zswap.c > index 0476e1c553c2..9c709368a0e6 100644 > --- a/mm/zswap.c > +++ b/mm/zswap.c > @@ -1449,7 +1449,6 @@ static int zswap_writeback_entry(struct zswap_entry *entry, > struct page *page; > struct mempolicy *mpol; > bool page_was_allocated; > - int ret; > struct writeback_control wbc = { > .sync_mode = WB_SYNC_NONE, > }; > @@ -1458,16 +1457,13 @@ static int zswap_writeback_entry(struct zswap_entry *entry, > mpol = get_task_policy(current); > page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, > NO_INTERLEAVE_INDEX, &page_was_allocated, true); > - if (!page) { > - ret = -ENOMEM; > - goto fail; > - } > + if (!page) > + return -ENOMEM; > > /* Found an existing page, we raced with load/swapin */ > if (!page_was_allocated) { > put_page(page); > - ret = -EEXIST; > - goto fail; > + return -EEXIST; > } > > /* > @@ -1481,8 +1477,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry, > if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) { > spin_unlock(&tree->lock); > delete_from_swap_cache(page_folio(page)); > - ret = -ENOMEM; > - goto fail; > + return -ENOMEM; > } > spin_unlock(&tree->lock); > > @@ -1503,15 +1498,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry, > __swap_writepage(page, &wbc); > put_page(page); > > - return ret; > - > -fail: > - /* > - * If we get here because the page is already in swapcache, a > - * load may be happening concurrently. It is safe and okay to > - * not free the entry. It is also okay to return !0. > - */ > - return ret; > + return 0; > } > > static int zswap_is_page_same_filled(void *ptr, unsigned long *value) > > -- > b4 0.10.1 LGTM. The fail label was primarily to free the temporary memory. Since we're re-using dstmem, this isn't really needed anymore. Nice cleanup. So, FWIW: Reviewed-by: Nhat Pham <nphamcs@gmail.com>
On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou <zhouchengming@bytedance.com> wrote: > > Also after the common decompress part goes to __zswap_load(), we can > cleanup the zswap_reclaim_entry() a little. I think you mean zswap_writeback_entry(), same for the commit title. > > Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com> > --- > mm/zswap.c | 23 +++++------------------ > 1 file changed, 5 insertions(+), 18 deletions(-) > > diff --git a/mm/zswap.c b/mm/zswap.c > index 0476e1c553c2..9c709368a0e6 100644 > --- a/mm/zswap.c > +++ b/mm/zswap.c > @@ -1449,7 +1449,6 @@ static int zswap_writeback_entry(struct zswap_entry *entry, > struct page *page; > struct mempolicy *mpol; > bool page_was_allocated; > - int ret; > struct writeback_control wbc = { > .sync_mode = WB_SYNC_NONE, > }; > @@ -1458,16 +1457,13 @@ static int zswap_writeback_entry(struct zswap_entry *entry, > mpol = get_task_policy(current); > page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, > NO_INTERLEAVE_INDEX, &page_was_allocated, true); > - if (!page) { > - ret = -ENOMEM; > - goto fail; > - } > + if (!page) > + return -ENOMEM; > > /* Found an existing page, we raced with load/swapin */ > if (!page_was_allocated) { > put_page(page); > - ret = -EEXIST; > - goto fail; > + return -EEXIST; > } > > /* > @@ -1481,8 +1477,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry, > if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) { > spin_unlock(&tree->lock); > delete_from_swap_cache(page_folio(page)); > - ret = -ENOMEM; > - goto fail; > + return -ENOMEM; > } > spin_unlock(&tree->lock); > > @@ -1503,15 +1498,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry, > __swap_writepage(page, &wbc); > put_page(page); > > - return ret; > - > -fail: > - /* > - * If we get here because the page is already in swapcache, a > - * load may be happening concurrently. It is safe and okay to > - * not free the entry. It is also okay to return !0. > - */ This comment should be moved above the failure check of __read_swap_cache_async() above, not completely removed. With that: Reviewed-by: Yosry Ahmed <yosryahmed@google.com> Feel free to squash this patch into the one creating __zswap_load() or leaving it as-is.
On Wed, 13 Dec 2023 17:02:25 -0800 Yosry Ahmed <yosryahmed@google.com> wrote: > On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou > <zhouchengming@bytedance.com> wrote: > > > > Also after the common decompress part goes to __zswap_load(), we can > > cleanup the zswap_reclaim_entry() a little. > > I think you mean zswap_writeback_entry(), same for the commit title. I updated my copy of the changelog, thanks. > > - /* > > - * If we get here because the page is already in swapcache, a > > - * load may be happening concurrently. It is safe and okay to > > - * not free the entry. It is also okay to return !0. > > - */ > > This comment should be moved above the failure check of > __read_swap_cache_async() above, not completely removed. This? --- a/mm/zswap.c~mm-zswap-cleanup-zswap_reclaim_entry-fix +++ a/mm/zswap.c @@ -1457,8 +1457,14 @@ static int zswap_writeback_entry(struct mpol = get_task_policy(current); page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, NO_INTERLEAVE_INDEX, &page_was_allocated, true); - if (!page) + if (!page) { + /* + * If we get here because the page is already in swapcache, a + * load may be happening concurrently. It is safe and okay to + * not free the entry. It is also okay to return !0. + */ return -ENOMEM; + } /* Found an existing page, we raced with load/swapin */ if (!page_was_allocated) {
On Thu, Dec 14, 2023 at 2:23 PM Andrew Morton <akpm@linux-foundation.org> wrote: > > On Wed, 13 Dec 2023 17:02:25 -0800 Yosry Ahmed <yosryahmed@google.com> wrote: > > > On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou > > <zhouchengming@bytedance.com> wrote: > > > > > > Also after the common decompress part goes to __zswap_load(), we can > > > cleanup the zswap_reclaim_entry() a little. > > > > I think you mean zswap_writeback_entry(), same for the commit title. > > I updated my copy of the changelog, thanks. > > > > - /* > > > - * If we get here because the page is already in swapcache, a > > > - * load may be happening concurrently. It is safe and okay to > > > - * not free the entry. It is also okay to return !0. > > > - */ > > > > This comment should be moved above the failure check of > > __read_swap_cache_async() above, not completely removed. > > This? Yes, thanks a lot. Although I think a new version is needed anyway to address other comments. > > --- a/mm/zswap.c~mm-zswap-cleanup-zswap_reclaim_entry-fix > +++ a/mm/zswap.c > @@ -1457,8 +1457,14 @@ static int zswap_writeback_entry(struct > mpol = get_task_policy(current); > page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, > NO_INTERLEAVE_INDEX, &page_was_allocated, true); > - if (!page) > + if (!page) { > + /* > + * If we get here because the page is already in swapcache, a > + * load may be happening concurrently. It is safe and okay to > + * not free the entry. It is also okay to return !0. > + */ > return -ENOMEM; > + } > > /* Found an existing page, we raced with load/swapin */ > if (!page_was_allocated) { >
On Thu, Dec 14, 2023 at 02:41:26PM -0800, Yosry Ahmed wrote: > On Thu, Dec 14, 2023 at 2:23 PM Andrew Morton <akpm@linux-foundation.org> wrote: > > > > On Wed, 13 Dec 2023 17:02:25 -0800 Yosry Ahmed <yosryahmed@google.com> wrote: > > > > > On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou > > > <zhouchengming@bytedance.com> wrote: > > > > > > > > Also after the common decompress part goes to __zswap_load(), we can > > > > cleanup the zswap_reclaim_entry() a little. > > > > > > I think you mean zswap_writeback_entry(), same for the commit title. > > > > I updated my copy of the changelog, thanks. > > > > > > - /* > > > > - * If we get here because the page is already in swapcache, a > > > > - * load may be happening concurrently. It is safe and okay to > > > > - * not free the entry. It is also okay to return !0. > > > > - */ > > > > > > This comment should be moved above the failure check of > > > __read_swap_cache_async() above, not completely removed. > > > > This? > > Yes, thanks a lot. Although I think a new version is needed anyway to > address other comments. > > > > > --- a/mm/zswap.c~mm-zswap-cleanup-zswap_reclaim_entry-fix > > +++ a/mm/zswap.c > > @@ -1457,8 +1457,14 @@ static int zswap_writeback_entry(struct > > mpol = get_task_policy(current); > > page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, > > NO_INTERLEAVE_INDEX, &page_was_allocated, true); > > - if (!page) > > + if (!page) { > > + /* > > + * If we get here because the page is already in swapcache, a > > + * load may be happening concurrently. It is safe and okay to > > + * not free the entry. It is also okay to return !0. > > + */ > > return -ENOMEM; > > + } > > > > /* Found an existing page, we raced with load/swapin */ > > if (!page_was_allocated) { That's the wrong branch, no? !page -> -ENOMEM page && !page_was_allocated -> already in swapcache Personally, I don't really get the comment. What does it mean that it's "okay" not to free the entry? There is a put, which may or may not free the entry if somebody else is using it. Is it explaining how lifetime works for refcounted objects? I'm similarly confused by the "it's okay" to return non-zero. What is that trying to convey? Deletion seemed like the right choice here, IMO ;)
On Mon, Dec 18, 2023 at 6:03 AM Johannes Weiner <hannes@cmpxchg.org> wrote: > > On Thu, Dec 14, 2023 at 02:41:26PM -0800, Yosry Ahmed wrote: > > On Thu, Dec 14, 2023 at 2:23 PM Andrew Morton <akpm@linux-foundation.org> wrote: > > > > > > On Wed, 13 Dec 2023 17:02:25 -0800 Yosry Ahmed <yosryahmed@google.com> wrote: > > > > > > > On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou > > > > <zhouchengming@bytedance.com> wrote: > > > > > > > > > > Also after the common decompress part goes to __zswap_load(), we can > > > > > cleanup the zswap_reclaim_entry() a little. > > > > > > > > I think you mean zswap_writeback_entry(), same for the commit title. > > > > > > I updated my copy of the changelog, thanks. > > > > > > > > - /* > > > > > - * If we get here because the page is already in swapcache, a > > > > > - * load may be happening concurrently. It is safe and okay to > > > > > - * not free the entry. It is also okay to return !0. > > > > > - */ > > > > > > > > This comment should be moved above the failure check of > > > > __read_swap_cache_async() above, not completely removed. > > > > > > This? > > > > Yes, thanks a lot. Although I think a new version is needed anyway to > > address other comments. > > > > > > > > --- a/mm/zswap.c~mm-zswap-cleanup-zswap_reclaim_entry-fix > > > +++ a/mm/zswap.c > > > @@ -1457,8 +1457,14 @@ static int zswap_writeback_entry(struct > > > mpol = get_task_policy(current); > > > page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, > > > NO_INTERLEAVE_INDEX, &page_was_allocated, true); > > > - if (!page) > > > + if (!page) { > > > + /* > > > + * If we get here because the page is already in swapcache, a > > > + * load may be happening concurrently. It is safe and okay to > > > + * not free the entry. It is also okay to return !0. > > > + */ > > > return -ENOMEM; > > > + } > > > > > > /* Found an existing page, we raced with load/swapin */ > > > if (!page_was_allocated) { > > That's the wrong branch, no? > > !page -> -ENOMEM > > page && !page_was_allocated -> already in swapcache Ah yes, my bad. > > Personally, I don't really get the comment. What does it mean that > it's "okay" not to free the entry? There is a put, which may or may > not free the entry if somebody else is using it. Is it explaining how > lifetime works for refcounted objects? I'm similarly confused by the > "it's okay" to return non-zero. What is that trying to convey? > > Deletion seemed like the right choice here, IMO ;) It's not the clearest of comments for sure. I think it is just trying to say that it is okay not to write back the entry from zswap and to fail, because the caller will just try another page. I did not like silently deleting the comment during the refactoring. How about rewriting it to something like: /* * If we get here because the page is already in the swapcache, a * load may be happening concurrently. Skip this page, the caller * will move on to a different page. */
On Mon, Dec 18, 2023 at 06:39:13AM -0800, Yosry Ahmed wrote: > On Mon, Dec 18, 2023 at 6:03 AM Johannes Weiner <hannes@cmpxchg.org> wrote: > > > > On Thu, Dec 14, 2023 at 02:41:26PM -0800, Yosry Ahmed wrote: > > > On Thu, Dec 14, 2023 at 2:23 PM Andrew Morton <akpm@linux-foundation.org> wrote: > > > > > > > > On Wed, 13 Dec 2023 17:02:25 -0800 Yosry Ahmed <yosryahmed@google.com> wrote: > > > > > > > > > On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou > > > > > <zhouchengming@bytedance.com> wrote: > > > > > > > > > > > > Also after the common decompress part goes to __zswap_load(), we can > > > > > > cleanup the zswap_reclaim_entry() a little. > > > > > > > > > > I think you mean zswap_writeback_entry(), same for the commit title. > > > > > > > > I updated my copy of the changelog, thanks. > > > > > > > > > > - /* > > > > > > - * If we get here because the page is already in swapcache, a > > > > > > - * load may be happening concurrently. It is safe and okay to > > > > > > - * not free the entry. It is also okay to return !0. > > > > > > - */ > > > > > > > > > > This comment should be moved above the failure check of > > > > > __read_swap_cache_async() above, not completely removed. > > > > > > > > This? > > > > > > Yes, thanks a lot. Although I think a new version is needed anyway to > > > address other comments. > > > > > > > > > > > --- a/mm/zswap.c~mm-zswap-cleanup-zswap_reclaim_entry-fix > > > > +++ a/mm/zswap.c > > > > @@ -1457,8 +1457,14 @@ static int zswap_writeback_entry(struct > > > > mpol = get_task_policy(current); > > > > page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, > > > > NO_INTERLEAVE_INDEX, &page_was_allocated, true); > > > > - if (!page) > > > > + if (!page) { > > > > + /* > > > > + * If we get here because the page is already in swapcache, a > > > > + * load may be happening concurrently. It is safe and okay to > > > > + * not free the entry. It is also okay to return !0. > > > > + */ > > > > return -ENOMEM; > > > > + } > > > > > > > > /* Found an existing page, we raced with load/swapin */ > > > > if (!page_was_allocated) { > > > > That's the wrong branch, no? > > > > !page -> -ENOMEM > > > > page && !page_was_allocated -> already in swapcache > > Ah yes, my bad. > > > > > Personally, I don't really get the comment. What does it mean that > > it's "okay" not to free the entry? There is a put, which may or may > > not free the entry if somebody else is using it. Is it explaining how > > lifetime works for refcounted objects? I'm similarly confused by the > > "it's okay" to return non-zero. What is that trying to convey? > > > > Deletion seemed like the right choice here, IMO ;) > > It's not the clearest of comments for sure. I think it is just trying > to say that it is okay not to write back the entry from zswap and to > fail, because the caller will just try another page. I did not like > silently deleting the comment during the refactoring. How about > rewriting it to something like: > > /* > * If we get here because the page is already in the swapcache, a > * load may be happening concurrently. Skip this page, the caller > * will move on to a different page. > */ Well there is this one already on the branch: /* Found an existing page, we raced with load/swapin */ which covers the first half. The unspoken assumption there is that writeback is an operation for an aged out page, while swapin means the age just got reset to 0. Maybe it makes sense to elaborate on that?
On Mon, Dec 18, 2023 at 6:58 AM Johannes Weiner <hannes@cmpxchg.org> wrote: > > On Mon, Dec 18, 2023 at 06:39:13AM -0800, Yosry Ahmed wrote: > > On Mon, Dec 18, 2023 at 6:03 AM Johannes Weiner <hannes@cmpxchg.org> wrote: > > > > > > On Thu, Dec 14, 2023 at 02:41:26PM -0800, Yosry Ahmed wrote: > > > > On Thu, Dec 14, 2023 at 2:23 PM Andrew Morton <akpm@linux-foundation.org> wrote: > > > > > > > > > > On Wed, 13 Dec 2023 17:02:25 -0800 Yosry Ahmed <yosryahmed@google.com> wrote: > > > > > > > > > > > On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou > > > > > > <zhouchengming@bytedance.com> wrote: > > > > > > > > > > > > > > Also after the common decompress part goes to __zswap_load(), we can > > > > > > > cleanup the zswap_reclaim_entry() a little. > > > > > > > > > > > > I think you mean zswap_writeback_entry(), same for the commit title. > > > > > > > > > > I updated my copy of the changelog, thanks. > > > > > > > > > > > > - /* > > > > > > > - * If we get here because the page is already in swapcache, a > > > > > > > - * load may be happening concurrently. It is safe and okay to > > > > > > > - * not free the entry. It is also okay to return !0. > > > > > > > - */ > > > > > > > > > > > > This comment should be moved above the failure check of > > > > > > __read_swap_cache_async() above, not completely removed. > > > > > > > > > > This? > > > > > > > > Yes, thanks a lot. Although I think a new version is needed anyway to > > > > address other comments. > > > > > > > > > > > > > > --- a/mm/zswap.c~mm-zswap-cleanup-zswap_reclaim_entry-fix > > > > > +++ a/mm/zswap.c > > > > > @@ -1457,8 +1457,14 @@ static int zswap_writeback_entry(struct > > > > > mpol = get_task_policy(current); > > > > > page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, > > > > > NO_INTERLEAVE_INDEX, &page_was_allocated, true); > > > > > - if (!page) > > > > > + if (!page) { > > > > > + /* > > > > > + * If we get here because the page is already in swapcache, a > > > > > + * load may be happening concurrently. It is safe and okay to > > > > > + * not free the entry. It is also okay to return !0. > > > > > + */ > > > > > return -ENOMEM; > > > > > + } > > > > > > > > > > /* Found an existing page, we raced with load/swapin */ > > > > > if (!page_was_allocated) { > > > > > > That's the wrong branch, no? > > > > > > !page -> -ENOMEM > > > > > > page && !page_was_allocated -> already in swapcache > > > > Ah yes, my bad. > > > > > > > > Personally, I don't really get the comment. What does it mean that > > > it's "okay" not to free the entry? There is a put, which may or may > > > not free the entry if somebody else is using it. Is it explaining how > > > lifetime works for refcounted objects? I'm similarly confused by the > > > "it's okay" to return non-zero. What is that trying to convey? > > > > > > Deletion seemed like the right choice here, IMO ;) > > > > It's not the clearest of comments for sure. I think it is just trying > > to say that it is okay not to write back the entry from zswap and to > > fail, because the caller will just try another page. I did not like > > silently deleting the comment during the refactoring. How about > > rewriting it to something like: > > > > /* > > * If we get here because the page is already in the swapcache, a > > * load may be happening concurrently. Skip this page, the caller > > * will move on to a different page. > > */ > > Well there is this one already on the branch: > > /* Found an existing page, we raced with load/swapin */ > > which covers the first half. The unspoken assumption there is that > writeback is an operation for an aged out page, while swapin means the > age just got reset to 0. Maybe it makes sense to elaborate on that? How about the following diff? This applies on top of Andrew's fix: diff --git a/mm/zswap.c b/mm/zswap.c index e8f8f47596dae..8228a0b370979 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1458,15 +1458,14 @@ static int zswap_writeback_entry(struct zswap_entry *entry, page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, NO_INTERLEAVE_INDEX, &page_was_allocated, true); if (!page) { - /* - * If we get here because the page is already in swapcache, a - * load may be happening concurrently. It is safe and okay to - * not free the entry. It is also okay to return !0. - */ return -ENOMEM; } - /* Found an existing page, we raced with load/swapin */ + /* + * Found an existing page, we raced with load/swapin. We generally + * writeback cold pages from zswap, and swapin means the page just + * became hot. Skip this page and let the caller find another one. + */ if (!page_was_allocated) { put_page(page); return -EEXIST;
On 2023/12/19 04:52, Yosry Ahmed wrote: > On Mon, Dec 18, 2023 at 6:58 AM Johannes Weiner <hannes@cmpxchg.org> wrote: >> >> On Mon, Dec 18, 2023 at 06:39:13AM -0800, Yosry Ahmed wrote: >>> On Mon, Dec 18, 2023 at 6:03 AM Johannes Weiner <hannes@cmpxchg.org> wrote: >>>> >>>> On Thu, Dec 14, 2023 at 02:41:26PM -0800, Yosry Ahmed wrote: >>>>> On Thu, Dec 14, 2023 at 2:23 PM Andrew Morton <akpm@linux-foundation.org> wrote: >>>>>> >>>>>> On Wed, 13 Dec 2023 17:02:25 -0800 Yosry Ahmed <yosryahmed@google.com> wrote: >>>>>> >>>>>>> On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou >>>>>>> <zhouchengming@bytedance.com> wrote: >>>>>>>> >>>>>>>> Also after the common decompress part goes to __zswap_load(), we can >>>>>>>> cleanup the zswap_reclaim_entry() a little. >>>>>>> >>>>>>> I think you mean zswap_writeback_entry(), same for the commit title. >>>>>> >>>>>> I updated my copy of the changelog, thanks. >>>>>> >>>>>>>> - /* >>>>>>>> - * If we get here because the page is already in swapcache, a >>>>>>>> - * load may be happening concurrently. It is safe and okay to >>>>>>>> - * not free the entry. It is also okay to return !0. >>>>>>>> - */ >>>>>>> >>>>>>> This comment should be moved above the failure check of >>>>>>> __read_swap_cache_async() above, not completely removed. >>>>>> >>>>>> This? >>>>> >>>>> Yes, thanks a lot. Although I think a new version is needed anyway to >>>>> address other comments. >>>>> >>>>>> >>>>>> --- a/mm/zswap.c~mm-zswap-cleanup-zswap_reclaim_entry-fix >>>>>> +++ a/mm/zswap.c >>>>>> @@ -1457,8 +1457,14 @@ static int zswap_writeback_entry(struct >>>>>> mpol = get_task_policy(current); >>>>>> page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, >>>>>> NO_INTERLEAVE_INDEX, &page_was_allocated, true); >>>>>> - if (!page) >>>>>> + if (!page) { >>>>>> + /* >>>>>> + * If we get here because the page is already in swapcache, a >>>>>> + * load may be happening concurrently. It is safe and okay to >>>>>> + * not free the entry. It is also okay to return !0. >>>>>> + */ >>>>>> return -ENOMEM; >>>>>> + } >>>>>> >>>>>> /* Found an existing page, we raced with load/swapin */ >>>>>> if (!page_was_allocated) { >>>> >>>> That's the wrong branch, no? >>>> >>>> !page -> -ENOMEM >>>> >>>> page && !page_was_allocated -> already in swapcache >>> >>> Ah yes, my bad. >>> >>>> >>>> Personally, I don't really get the comment. What does it mean that >>>> it's "okay" not to free the entry? There is a put, which may or may >>>> not free the entry if somebody else is using it. Is it explaining how >>>> lifetime works for refcounted objects? I'm similarly confused by the >>>> "it's okay" to return non-zero. What is that trying to convey? >>>> >>>> Deletion seemed like the right choice here, IMO ;) >>> >>> It's not the clearest of comments for sure. I think it is just trying >>> to say that it is okay not to write back the entry from zswap and to >>> fail, because the caller will just try another page. I did not like >>> silently deleting the comment during the refactoring. How about >>> rewriting it to something like: >>> >>> /* >>> * If we get here because the page is already in the swapcache, a >>> * load may be happening concurrently. Skip this page, the caller >>> * will move on to a different page. >>> */ >> >> Well there is this one already on the branch: >> >> /* Found an existing page, we raced with load/swapin */ >> >> which covers the first half. The unspoken assumption there is that >> writeback is an operation for an aged out page, while swapin means the >> age just got reset to 0. Maybe it makes sense to elaborate on that? > > How about the following diff? This applies on top of Andrew's fix: > Reviewed-by: Chengming Zhou <zhouchengming@bytedance.com> The latest v3 also put the comments on the wrong branch, and this diff could be folded to fix it. v3: https://lore.kernel.org/all/20231213-zswap-dstmem-v3-5-4eac09b94ece@bytedance.com/ > diff --git a/mm/zswap.c b/mm/zswap.c > index e8f8f47596dae..8228a0b370979 100644 > --- a/mm/zswap.c > +++ b/mm/zswap.c > @@ -1458,15 +1458,14 @@ static int zswap_writeback_entry(struct > zswap_entry *entry, > page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, > NO_INTERLEAVE_INDEX, &page_was_allocated, true); > if (!page) { > - /* > - * If we get here because the page is already in swapcache, a > - * load may be happening concurrently. It is safe and okay to > - * not free the entry. It is also okay to return !0. > - */ > return -ENOMEM; > } > > - /* Found an existing page, we raced with load/swapin */ > + /* > + * Found an existing page, we raced with load/swapin. We generally > + * writeback cold pages from zswap, and swapin means the page just > + * became hot. Skip this page and let the caller find another one. > + */ > if (!page_was_allocated) { > put_page(page); > return -EEXIST;
On Mon, Dec 18, 2023 at 12:52:40PM -0800, Yosry Ahmed wrote: > --- a/mm/zswap.c > +++ b/mm/zswap.c > @@ -1458,15 +1458,14 @@ static int zswap_writeback_entry(struct > zswap_entry *entry, > page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, > NO_INTERLEAVE_INDEX, &page_was_allocated, true); > if (!page) { > - /* > - * If we get here because the page is already in swapcache, a > - * load may be happening concurrently. It is safe and okay to > - * not free the entry. It is also okay to return !0. > - */ > return -ENOMEM; > } > > - /* Found an existing page, we raced with load/swapin */ > + /* > + * Found an existing page, we raced with load/swapin. We generally > + * writeback cold pages from zswap, and swapin means the page just > + * became hot. Skip this page and let the caller find another one. > + */ > if (!page_was_allocated) { > put_page(page); > return -EEXIST; ACK, that looks good to me! Thanks
diff --git a/mm/zswap.c b/mm/zswap.c index 0476e1c553c2..9c709368a0e6 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1449,7 +1449,6 @@ static int zswap_writeback_entry(struct zswap_entry *entry, struct page *page; struct mempolicy *mpol; bool page_was_allocated; - int ret; struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, }; @@ -1458,16 +1457,13 @@ static int zswap_writeback_entry(struct zswap_entry *entry, mpol = get_task_policy(current); page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, NO_INTERLEAVE_INDEX, &page_was_allocated, true); - if (!page) { - ret = -ENOMEM; - goto fail; - } + if (!page) + return -ENOMEM; /* Found an existing page, we raced with load/swapin */ if (!page_was_allocated) { put_page(page); - ret = -EEXIST; - goto fail; + return -EEXIST; } /* @@ -1481,8 +1477,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry, if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) { spin_unlock(&tree->lock); delete_from_swap_cache(page_folio(page)); - ret = -ENOMEM; - goto fail; + return -ENOMEM; } spin_unlock(&tree->lock); @@ -1503,15 +1498,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry, __swap_writepage(page, &wbc); put_page(page); - return ret; - -fail: - /* - * If we get here because the page is already in swapcache, a - * load may be happening concurrently. It is safe and okay to - * not free the entry. It is also okay to return !0. - */ - return ret; + return 0; } static int zswap_is_page_same_filled(void *ptr, unsigned long *value)
Also after the common decompress part goes to __zswap_load(), we can cleanup the zswap_reclaim_entry() a little. Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com> --- mm/zswap.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-)