Message ID | 20240719093338.55117-9-linyunsheng@huawei.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Replace page_frag with page_frag_cache for sk_page_frag() | expand |
On Fri, 2024-07-19 at 17:33 +0800, Yunsheng Lin wrote: > Refactor common codes from __page_frag_alloc_va_align() > to __page_frag_cache_refill(), so that the new API can > make use of them. > > CC: Alexander Duyck <alexander.duyck@gmail.com> > Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> > --- > include/linux/page_frag_cache.h | 2 +- > mm/page_frag_cache.c | 93 +++++++++++++++++---------------- > 2 files changed, 49 insertions(+), 46 deletions(-) > > diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h > index 12a16f8e8ad0..5aa45de7a9a5 100644 > --- a/include/linux/page_frag_cache.h > +++ b/include/linux/page_frag_cache.h > @@ -50,7 +50,7 @@ static inline void *encoded_page_address(unsigned long encoded_va) > > static inline void page_frag_cache_init(struct page_frag_cache *nc) > { > - nc->encoded_va = 0; > + memset(nc, 0, sizeof(*nc)); > } > I do not like requiring the entire structure to be reset as a part of init. If encoded_va is 0 then we have reset the page and the flags. There shouldn't be anything else we need to reset as remaining and bias will be reset when we reallocate. > static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc) > diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c > index 7928e5d50711..d9c9cad17af7 100644 > --- a/mm/page_frag_cache.c > +++ b/mm/page_frag_cache.c > @@ -19,6 +19,28 @@ > #include <linux/page_frag_cache.h> > #include "internal.h" > > +static struct page *__page_frag_cache_recharge(struct page_frag_cache *nc) > +{ > + unsigned long encoded_va = nc->encoded_va; > + struct page *page; > + > + page = virt_to_page((void *)encoded_va); > + if (!page_ref_sub_and_test(page, nc->pagecnt_bias)) > + return NULL; > + > + if (unlikely(encoded_page_pfmemalloc(encoded_va))) { > + VM_BUG_ON(compound_order(page) != > + encoded_page_order(encoded_va)); > + free_unref_page(page, encoded_page_order(encoded_va)); > + return NULL; > + } > + > + /* OK, page count is 0, we can safely set it */ > + set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1); > + > + return page; > +} > + > static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, > gfp_t gfp_mask) > { > @@ -26,6 +48,14 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, > struct page *page = NULL; > gfp_t gfp = gfp_mask; > > + if (likely(nc->encoded_va)) { > + page = __page_frag_cache_recharge(nc); > + if (page) { > + order = encoded_page_order(nc->encoded_va); > + goto out; > + } > + } > + This code has no business here. This is refill, you just dropped recharge in here which will make a complete mess of the ordering and be confusing to say the least. The expectation was that if we are calling this function it is going to overwrite the virtual address to NULL on failure so we discard the old page if there is one present. This changes that behaviour. What you effectively did is made __page_frag_cache_refill into the recharge function. > #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) > gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | > __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC; > @@ -35,7 +65,7 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, > if (unlikely(!page)) { > page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); > if (unlikely(!page)) { > - nc->encoded_va = 0; > + memset(nc, 0, sizeof(*nc)); > return NULL; > } > The memset will take a few more instructions than the existing code did. I would prefer to keep this as is if at all possible. > @@ -45,6 +75,16 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, > nc->encoded_va = encode_aligned_va(page_address(page), order, > page_is_pfmemalloc(page)); > > + /* Even if we own the page, we do not use atomic_set(). > + * This would break get_page_unless_zero() users. > + */ > + page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); > + > +out: > + /* reset page count bias and remaining to start of new frag */ > + nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; > + nc->remaining = PAGE_SIZE << order; > + > return page; > } > Why bother returning a page at all? It doesn't seem like you don't use it anymore. It looks like the use cases you have for it in patch 11/12 all appear to be broken from what I can tell as you are adding page as a variable when we don't need to be passing internal details to the callers of the function when just a simple error return code would do. > @@ -55,7 +95,7 @@ void page_frag_cache_drain(struct page_frag_cache *nc) > > __page_frag_cache_drain(virt_to_head_page((void *)nc->encoded_va), > nc->pagecnt_bias); > - nc->encoded_va = 0; > + memset(nc, 0, sizeof(*nc)); > } > EXPORT_SYMBOL(page_frag_cache_drain); > > @@ -72,31 +112,9 @@ void *__page_frag_alloc_va_align(struct page_frag_cache *nc, > unsigned int fragsz, gfp_t gfp_mask, > unsigned int align_mask) > { > - unsigned long encoded_va = nc->encoded_va; > - unsigned int size, remaining; > - struct page *page; > - > - if (unlikely(!encoded_va)) { > -refill: > - page = __page_frag_cache_refill(nc, gfp_mask); > - if (!page) > - return NULL; > - > - encoded_va = nc->encoded_va; > - size = page_frag_cache_page_size(encoded_va); > + unsigned int size = page_frag_cache_page_size(nc->encoded_va); > + unsigned int remaining = nc->remaining & align_mask; > > - /* Even if we own the page, we do not use atomic_set(). > - * This would break get_page_unless_zero() users. > - */ > - page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); > - > - /* reset page count bias and remaining to start of new frag */ > - nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; > - nc->remaining = size; > - } > - > - size = page_frag_cache_page_size(encoded_va); > - remaining = nc->remaining & align_mask; > if (unlikely(remaining < fragsz)) { I am not a fan of adding a dependency on remaining being set *before* encoded_va. The fact is it relies on the size to set it. In addition this is creating a big blob of code for the conditional paths to have to jump over. I think it is much better to first validate encoded_va, and then validate remaining. Otherwise just checking remaining seems problematic and like a recipe for NULL pointer accesses. > if (unlikely(fragsz > PAGE_SIZE)) { > /* > @@ -111,32 +129,17 @@ void *__page_frag_alloc_va_align(struct page_frag_cache *nc, > return NULL; > } > > - page = virt_to_page((void *)encoded_va); > - > - if (!page_ref_sub_and_test(page, nc->pagecnt_bias)) > - goto refill; > - > - if (unlikely(encoded_page_pfmemalloc(encoded_va))) { > - VM_BUG_ON(compound_order(page) != > - encoded_page_order(encoded_va)); > - free_unref_page(page, encoded_page_order(encoded_va)); > - goto refill; > - } > - > - /* OK, page count is 0, we can safely set it */ > - set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1); > - > - /* reset page count bias and remaining to start of new frag */ > - nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; > - nc->remaining = size; > + if (unlikely(!__page_frag_cache_refill(nc, gfp_mask))) > + return NULL; > > + size = page_frag_cache_page_size(nc->encoded_va); So this is adding yet another setting/reading of size to the recharge path now. Previously the recharge path could just reuse the existing size. > remaining = size; > } > > nc->pagecnt_bias--; > nc->remaining = remaining - fragsz; > > - return encoded_page_address(encoded_va) + (size - remaining); > + return encoded_page_address(nc->encoded_va) + (size - remaining); > } > EXPORT_SYMBOL(__page_frag_alloc_va_align); >
On 2024/7/22 7:40, Alexander H Duyck wrote: > On Fri, 2024-07-19 at 17:33 +0800, Yunsheng Lin wrote: >> Refactor common codes from __page_frag_alloc_va_align() >> to __page_frag_cache_refill(), so that the new API can >> make use of them. >> >> CC: Alexander Duyck <alexander.duyck@gmail.com> >> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> >> --- >> include/linux/page_frag_cache.h | 2 +- >> mm/page_frag_cache.c | 93 +++++++++++++++++---------------- >> 2 files changed, 49 insertions(+), 46 deletions(-) >> >> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h >> index 12a16f8e8ad0..5aa45de7a9a5 100644 >> --- a/include/linux/page_frag_cache.h >> +++ b/include/linux/page_frag_cache.h >> @@ -50,7 +50,7 @@ static inline void *encoded_page_address(unsigned long encoded_va) >> >> static inline void page_frag_cache_init(struct page_frag_cache *nc) >> { >> - nc->encoded_va = 0; >> + memset(nc, 0, sizeof(*nc)); >> } >> > > I do not like requiring the entire structure to be reset as a part of > init. If encoded_va is 0 then we have reset the page and the flags. > There shouldn't be anything else we need to reset as remaining and bias > will be reset when we reallocate. The argument is about aoviding one checking for fast path by doing the memset in the slow path, which you might already know accroding to your comment in previous version. It is just sometimes hard to understand your preference for maintainability over performance here as sometimes your comment seems to perfer performance over maintainability, like the LEA trick you mentioned and offset count-down before this patchset. It would be good to be more consistent about this, otherwise it is sometimes confusing when doing the refactoring. > >> static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc) >> diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c >> index 7928e5d50711..d9c9cad17af7 100644 >> --- a/mm/page_frag_cache.c >> +++ b/mm/page_frag_cache.c >> @@ -19,6 +19,28 @@ >> #include <linux/page_frag_cache.h> >> #include "internal.h" >> >> +static struct page *__page_frag_cache_recharge(struct page_frag_cache *nc) >> +{ >> + unsigned long encoded_va = nc->encoded_va; >> + struct page *page; >> + >> + page = virt_to_page((void *)encoded_va); >> + if (!page_ref_sub_and_test(page, nc->pagecnt_bias)) >> + return NULL; >> + >> + if (unlikely(encoded_page_pfmemalloc(encoded_va))) { >> + VM_BUG_ON(compound_order(page) != >> + encoded_page_order(encoded_va)); >> + free_unref_page(page, encoded_page_order(encoded_va)); >> + return NULL; >> + } >> + >> + /* OK, page count is 0, we can safely set it */ >> + set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1); >> + >> + return page; >> +} >> + >> static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, >> gfp_t gfp_mask) >> { >> @@ -26,6 +48,14 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, >> struct page *page = NULL; >> gfp_t gfp = gfp_mask; >> >> + if (likely(nc->encoded_va)) { >> + page = __page_frag_cache_recharge(nc); >> + if (page) { >> + order = encoded_page_order(nc->encoded_va); >> + goto out; >> + } >> + } >> + > > This code has no business here. This is refill, you just dropped > recharge in here which will make a complete mess of the ordering and be > confusing to say the least. > > The expectation was that if we are calling this function it is going to > overwrite the virtual address to NULL on failure so we discard the old > page if there is one present. This changes that behaviour. What you > effectively did is made __page_frag_cache_refill into the recharge > function. The idea is to reuse the below for both __page_frag_cache_refill() and __page_frag_cache_recharge(), which seems to be about maintainability to not having duplicated code. If there is a better idea to avoid that duplicated code while keeping the old behaviour, I am happy to change it. /* reset page count bias and remaining to start of new frag */ nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; nc->remaining = PAGE_SIZE << order; > >> #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) >> gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | >> __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC; >> @@ -35,7 +65,7 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, >> if (unlikely(!page)) { >> page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); >> if (unlikely(!page)) { >> - nc->encoded_va = 0; >> + memset(nc, 0, sizeof(*nc)); >> return NULL; >> } >> > > The memset will take a few more instructions than the existing code > did. I would prefer to keep this as is if at all possible. It will not take more instructions for arm64 as it has 'stp' instruction for __HAVE_ARCH_MEMSET is set. There is something similar for x64? > >> @@ -45,6 +75,16 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, >> nc->encoded_va = encode_aligned_va(page_address(page), order, >> page_is_pfmemalloc(page)); >> >> + /* Even if we own the page, we do not use atomic_set(). >> + * This would break get_page_unless_zero() users. >> + */ >> + page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); >> + >> +out: >> + /* reset page count bias and remaining to start of new frag */ >> + nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; >> + nc->remaining = PAGE_SIZE << order; >> + >> return page; >> } >> > > Why bother returning a page at all? It doesn't seem like you don't use > it anymore. It looks like the use cases you have for it in patch 11/12 > all appear to be broken from what I can tell as you are adding page as > a variable when we don't need to be passing internal details to the > callers of the function when just a simple error return code would do. It would be good to be more specific about the 'broken' part here.
On Mon, Jul 22, 2024 at 5:55 AM Yunsheng Lin <linyunsheng@huawei.com> wrote: > > On 2024/7/22 7:40, Alexander H Duyck wrote: > > On Fri, 2024-07-19 at 17:33 +0800, Yunsheng Lin wrote: > >> Refactor common codes from __page_frag_alloc_va_align() > >> to __page_frag_cache_refill(), so that the new API can > >> make use of them. > >> > >> CC: Alexander Duyck <alexander.duyck@gmail.com> > >> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> > >> --- > >> include/linux/page_frag_cache.h | 2 +- > >> mm/page_frag_cache.c | 93 +++++++++++++++++---------------- > >> 2 files changed, 49 insertions(+), 46 deletions(-) > >> > >> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h > >> index 12a16f8e8ad0..5aa45de7a9a5 100644 > >> --- a/include/linux/page_frag_cache.h > >> +++ b/include/linux/page_frag_cache.h > >> @@ -50,7 +50,7 @@ static inline void *encoded_page_address(unsigned long encoded_va) > >> > >> static inline void page_frag_cache_init(struct page_frag_cache *nc) > >> { > >> - nc->encoded_va = 0; > >> + memset(nc, 0, sizeof(*nc)); > >> } > >> > > > > I do not like requiring the entire structure to be reset as a part of > > init. If encoded_va is 0 then we have reset the page and the flags. > > There shouldn't be anything else we need to reset as remaining and bias > > will be reset when we reallocate. > > The argument is about aoviding one checking for fast path by doing the > memset in the slow path, which you might already know accroding to your > comment in previous version. > > It is just sometimes hard to understand your preference for maintainability > over performance here as sometimes your comment seems to perfer performance > over maintainability, like the LEA trick you mentioned and offset count-down > before this patchset. It would be good to be more consistent about this, > otherwise it is sometimes confusing when doing the refactoring. The use of a negative offset is arguably more maintainable in my mind rather than being a performance trick. Essentially if you use the negative value you can just mask off the upper bits and it is the offset in the page. As such it is actually easier for me to read versus "remaining" which is an offset from the end of the page. Assuming you read the offset in hex anyway. > > > >> static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc) > >> diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c > >> index 7928e5d50711..d9c9cad17af7 100644 > >> --- a/mm/page_frag_cache.c > >> +++ b/mm/page_frag_cache.c > >> @@ -19,6 +19,28 @@ > >> #include <linux/page_frag_cache.h> > >> #include "internal.h" > >> > >> +static struct page *__page_frag_cache_recharge(struct page_frag_cache *nc) > >> +{ > >> + unsigned long encoded_va = nc->encoded_va; > >> + struct page *page; > >> + > >> + page = virt_to_page((void *)encoded_va); > >> + if (!page_ref_sub_and_test(page, nc->pagecnt_bias)) > >> + return NULL; > >> + > >> + if (unlikely(encoded_page_pfmemalloc(encoded_va))) { > >> + VM_BUG_ON(compound_order(page) != > >> + encoded_page_order(encoded_va)); > >> + free_unref_page(page, encoded_page_order(encoded_va)); > >> + return NULL; > >> + } > >> + > >> + /* OK, page count is 0, we can safely set it */ > >> + set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1); > >> + > >> + return page; > >> +} > >> + > >> static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, > >> gfp_t gfp_mask) > >> { > >> @@ -26,6 +48,14 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, > >> struct page *page = NULL; > >> gfp_t gfp = gfp_mask; > >> > >> + if (likely(nc->encoded_va)) { > >> + page = __page_frag_cache_recharge(nc); > >> + if (page) { > >> + order = encoded_page_order(nc->encoded_va); > >> + goto out; > >> + } > >> + } > >> + > > > > This code has no business here. This is refill, you just dropped > > recharge in here which will make a complete mess of the ordering and be > > confusing to say the least. > > > > The expectation was that if we are calling this function it is going to > > overwrite the virtual address to NULL on failure so we discard the old > > page if there is one present. This changes that behaviour. What you > > effectively did is made __page_frag_cache_refill into the recharge > > function. > > The idea is to reuse the below for both __page_frag_cache_refill() and > __page_frag_cache_recharge(), which seems to be about maintainability > to not having duplicated code. If there is a better idea to avoid that > duplicated code while keeping the old behaviour, I am happy to change > it. > > /* reset page count bias and remaining to start of new frag */ > nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; > nc->remaining = PAGE_SIZE << order; > The only piece that is really reused here is the pagecnt_bias assignment. What is obfuscated away is that the order is gotten through one of two paths. Really order isn't order here it is size. Which should have been fetched already. What you end up doing with this change is duplicating a bunch of code throughout the function. You end up having to fetch size multiple times multiple ways. here you are generating it with order. Then you have to turn around and get it again at the start of the function, and again after calling this function in order to pull it back out. > > > >> #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) > >> gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | > >> __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC; > >> @@ -35,7 +65,7 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, > >> if (unlikely(!page)) { > >> page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); > >> if (unlikely(!page)) { > >> - nc->encoded_va = 0; > >> + memset(nc, 0, sizeof(*nc)); > >> return NULL; > >> } > >> > > > > The memset will take a few more instructions than the existing code > > did. I would prefer to keep this as is if at all possible. > > It will not take more instructions for arm64 as it has 'stp' instruction for > __HAVE_ARCH_MEMSET is set. > There is something similar for x64? The x64 does not last I knew without getting into the SSE/AVX type stuff. This becomes two seperate 8B store instructions. > > > >> @@ -45,6 +75,16 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, > >> nc->encoded_va = encode_aligned_va(page_address(page), order, > >> page_is_pfmemalloc(page)); > >> > >> + /* Even if we own the page, we do not use atomic_set(). > >> + * This would break get_page_unless_zero() users. > >> + */ > >> + page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); > >> + > >> +out: > >> + /* reset page count bias and remaining to start of new frag */ > >> + nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; > >> + nc->remaining = PAGE_SIZE << order; > >> + > >> return page; > >> } > >> > > > > Why bother returning a page at all? It doesn't seem like you don't use > > it anymore. It looks like the use cases you have for it in patch 11/12 > > all appear to be broken from what I can tell as you are adding page as > > a variable when we don't need to be passing internal details to the > > callers of the function when just a simple error return code would do. > > It would be good to be more specific about the 'broken' part here. We are passing internals to the caller. Basically this is generally frowned upon for many implementations of things as the general idea is that the internal page we are using should be a pseudo-private value. I understand that you have one or two callers that need it for the use cases you have in patches 11/12, but it also seems like you are just passing it regardless. For example I noticed in a few cases you added the page pointer in 12 to handle the return value, but then just used it to check for NULL. My thought would be that rather than returning the page here you would be better off just returning 0 or an error and then doing the virt_to_page translation for all the cases where the page is actually needed since you have to go that route for a cached page anyway.
On 2024/7/22 23:32, Alexander Duyck wrote: > On Mon, Jul 22, 2024 at 5:55 AM Yunsheng Lin <linyunsheng@huawei.com> wrote: >> >> On 2024/7/22 7:40, Alexander H Duyck wrote: >>> On Fri, 2024-07-19 at 17:33 +0800, Yunsheng Lin wrote: >>>> Refactor common codes from __page_frag_alloc_va_align() >>>> to __page_frag_cache_refill(), so that the new API can >>>> make use of them. >>>> >>>> CC: Alexander Duyck <alexander.duyck@gmail.com> >>>> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> >>>> --- >>>> include/linux/page_frag_cache.h | 2 +- >>>> mm/page_frag_cache.c | 93 +++++++++++++++++---------------- >>>> 2 files changed, 49 insertions(+), 46 deletions(-) >>>> >>>> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h >>>> index 12a16f8e8ad0..5aa45de7a9a5 100644 >>>> --- a/include/linux/page_frag_cache.h >>>> +++ b/include/linux/page_frag_cache.h >>>> @@ -50,7 +50,7 @@ static inline void *encoded_page_address(unsigned long encoded_va) >>>> >>>> static inline void page_frag_cache_init(struct page_frag_cache *nc) >>>> { >>>> - nc->encoded_va = 0; >>>> + memset(nc, 0, sizeof(*nc)); >>>> } >>>> >>> >>> I do not like requiring the entire structure to be reset as a part of >>> init. If encoded_va is 0 then we have reset the page and the flags. >>> There shouldn't be anything else we need to reset as remaining and bias >>> will be reset when we reallocate. >> >> The argument is about aoviding one checking for fast path by doing the >> memset in the slow path, which you might already know accroding to your >> comment in previous version. >> >> It is just sometimes hard to understand your preference for maintainability >> over performance here as sometimes your comment seems to perfer performance >> over maintainability, like the LEA trick you mentioned and offset count-down >> before this patchset. It would be good to be more consistent about this, >> otherwise it is sometimes confusing when doing the refactoring. > > The use of a negative offset is arguably more maintainable in my mind > rather than being a performance trick. Essentially if you use the > negative value you can just mask off the upper bits and it is the > offset in the page. As such it is actually easier for me to read > versus "remaining" which is an offset from the end of the page. > Assuming you read the offset in hex anyway. Reading the above doesn't seems maintainable to me:( > >>> >>>> static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc) >>>> diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c >>>> index 7928e5d50711..d9c9cad17af7 100644 >>>> --- a/mm/page_frag_cache.c >>>> +++ b/mm/page_frag_cache.c >>>> @@ -19,6 +19,28 @@ >>>> #include <linux/page_frag_cache.h> >>>> #include "internal.h" >>>> >>>> +static struct page *__page_frag_cache_recharge(struct page_frag_cache *nc) >>>> +{ >>>> + unsigned long encoded_va = nc->encoded_va; >>>> + struct page *page; >>>> + >>>> + page = virt_to_page((void *)encoded_va); >>>> + if (!page_ref_sub_and_test(page, nc->pagecnt_bias)) >>>> + return NULL; >>>> + >>>> + if (unlikely(encoded_page_pfmemalloc(encoded_va))) { >>>> + VM_BUG_ON(compound_order(page) != >>>> + encoded_page_order(encoded_va)); >>>> + free_unref_page(page, encoded_page_order(encoded_va)); >>>> + return NULL; >>>> + } >>>> + >>>> + /* OK, page count is 0, we can safely set it */ >>>> + set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1); >>>> + >>>> + return page; >>>> +} >>>> + >>>> static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, >>>> gfp_t gfp_mask) >>>> { >>>> @@ -26,6 +48,14 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, >>>> struct page *page = NULL; >>>> gfp_t gfp = gfp_mask; >>>> >>>> + if (likely(nc->encoded_va)) { >>>> + page = __page_frag_cache_recharge(nc); >>>> + if (page) { >>>> + order = encoded_page_order(nc->encoded_va); >>>> + goto out; >>>> + } >>>> + } >>>> + >>> >>> This code has no business here. This is refill, you just dropped >>> recharge in here which will make a complete mess of the ordering and be >>> confusing to say the least. >>> >>> The expectation was that if we are calling this function it is going to >>> overwrite the virtual address to NULL on failure so we discard the old >>> page if there is one present. This changes that behaviour. What you >>> effectively did is made __page_frag_cache_refill into the recharge >>> function. >> >> The idea is to reuse the below for both __page_frag_cache_refill() and >> __page_frag_cache_recharge(), which seems to be about maintainability >> to not having duplicated code. If there is a better idea to avoid that >> duplicated code while keeping the old behaviour, I am happy to change >> it. >> >> /* reset page count bias and remaining to start of new frag */ >> nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; >> nc->remaining = PAGE_SIZE << order; >> > > The only piece that is really reused here is the pagecnt_bias > assignment. What is obfuscated away is that the order is gotten > through one of two paths. Really order isn't order here it is size. > Which should have been fetched already. What you end up doing with > this change is duplicating a bunch of code throughout the function. > You end up having to fetch size multiple times multiple ways. here you > are generating it with order. Then you have to turn around and get it > again at the start of the function, and again after calling this > function in order to pull it back out. I am assuming you would like to reserve old behavior as below? if(!encoded_va) { refill: __page_frag_cache_refill() } if(remaining < fragsz) { if(!__page_frag_cache_recharge()) goto refill; } As we are adding new APIs, are we expecting new APIs also duplicate the above pattern? > >>> >>>> #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) >>>> gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | >>>> __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC; >>>> @@ -35,7 +65,7 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, >>>> if (unlikely(!page)) { >>>> page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); >>>> if (unlikely(!page)) { >>>> - nc->encoded_va = 0; >>>> + memset(nc, 0, sizeof(*nc)); >>>> return NULL; >>>> } >>>> >>> >>> The memset will take a few more instructions than the existing code >>> did. I would prefer to keep this as is if at all possible. >> >> It will not take more instructions for arm64 as it has 'stp' instruction for >> __HAVE_ARCH_MEMSET is set. >> There is something similar for x64? > > The x64 does not last I knew without getting into the SSE/AVX type > stuff. This becomes two seperate 8B store instructions. I can check later when I get hold of a x64 server. But doesn't it make sense to have one extra 8B store instructions in slow path to avoid a checking in fast path? > >>> >>>> @@ -45,6 +75,16 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, >>>> nc->encoded_va = encode_aligned_va(page_address(page), order, >>>> page_is_pfmemalloc(page)); >>>> >>>> + /* Even if we own the page, we do not use atomic_set(). >>>> + * This would break get_page_unless_zero() users. >>>> + */ >>>> + page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); >>>> + >>>> +out: >>>> + /* reset page count bias and remaining to start of new frag */ >>>> + nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; >>>> + nc->remaining = PAGE_SIZE << order; >>>> + >>>> return page; >>>> } >>>> >>> >>> Why bother returning a page at all? It doesn't seem like you don't use >>> it anymore. It looks like the use cases you have for it in patch 11/12 >>> all appear to be broken from what I can tell as you are adding page as >>> a variable when we don't need to be passing internal details to the >>> callers of the function when just a simple error return code would do. >> >> It would be good to be more specific about the 'broken' part here. > > We are passing internals to the caller. Basically this is generally > frowned upon for many implementations of things as the general idea is > that the internal page we are using should be a pseudo-private value. It is implementation detail and it is about avoid calling virt_to_page() as mentioned below, I am not sure why it is referred as 'broken', it would be better to provide more doc about why it is bad idea here, as using 'pseudo-private ' wording doesn't seems to justify the 'broken' part here. > I understand that you have one or two callers that need it for the use > cases you have in patches 11/12, but it also seems like you are just > passing it regardless. For example I noticed in a few cases you added > the page pointer in 12 to handle the return value, but then just used > it to check for NULL. My thought would be that rather than returning > the page here you would be better off just returning 0 or an error and > then doing the virt_to_page translation for all the cases where the > page is actually needed since you have to go that route for a cached > page anyway. Yes, it is about aovid calling virt_to_page() as much as possible.
On 2024/7/23 21:19, Yunsheng Lin wrote: ... >>>>> static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, >>>>> gfp_t gfp_mask) >>>>> { >>>>> @@ -26,6 +48,14 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, >>>>> struct page *page = NULL; >>>>> gfp_t gfp = gfp_mask; >>>>> >>>>> + if (likely(nc->encoded_va)) { >>>>> + page = __page_frag_cache_recharge(nc); >>>>> + if (page) { >>>>> + order = encoded_page_order(nc->encoded_va); >>>>> + goto out; >>>>> + } >>>>> + } >>>>> + >>>> >>>> This code has no business here. This is refill, you just dropped >>>> recharge in here which will make a complete mess of the ordering and be >>>> confusing to say the least. >>>> >>>> The expectation was that if we are calling this function it is going to >>>> overwrite the virtual address to NULL on failure so we discard the old >>>> page if there is one present. This changes that behaviour. What you >>>> effectively did is made __page_frag_cache_refill into the recharge >>>> function. >>> >>> The idea is to reuse the below for both __page_frag_cache_refill() and >>> __page_frag_cache_recharge(), which seems to be about maintainability >>> to not having duplicated code. If there is a better idea to avoid that >>> duplicated code while keeping the old behaviour, I am happy to change >>> it. >>> >>> /* reset page count bias and remaining to start of new frag */ >>> nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; >>> nc->remaining = PAGE_SIZE << order; >>> >> >> The only piece that is really reused here is the pagecnt_bias >> assignment. What is obfuscated away is that the order is gotten >> through one of two paths. Really order isn't order here it is size. >> Which should have been fetched already. What you end up doing with >> this change is duplicating a bunch of code throughout the function. >> You end up having to fetch size multiple times multiple ways. here you >> are generating it with order. Then you have to turn around and get it >> again at the start of the function, and again after calling this >> function in order to pull it back out. > > I am assuming you would like to reserve old behavior as below? > > if(!encoded_va) { > refill: > __page_frag_cache_refill() > } > > > if(remaining < fragsz) { > if(!__page_frag_cache_recharge()) > goto refill; > } > > As we are adding new APIs, are we expecting new APIs also duplicate > the above pattern? > >> How about something like below? __page_frag_cache_refill() and __page_frag_cache_reuse() does what their function name suggests as much as possible, __page_frag_cache_reload() is added to avoid new APIs duplicating similar pattern as much as possible, also avoid fetching size multiple times multiple ways as much as possible. static struct page *__page_frag_cache_reuse(unsigned long encoded_va, unsigned int pagecnt_bias) { struct page *page; page = virt_to_page((void *)encoded_va); if (!page_ref_sub_and_test(page, pagecnt_bias)) return NULL; if (unlikely(encoded_page_pfmemalloc(encoded_va))) { VM_BUG_ON(compound_order(page) != encoded_page_order(encoded_va)); free_unref_page(page, encoded_page_order(encoded_va)); return NULL; } /* OK, page count is 0, we can safely set it */ set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1); return page; } static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, gfp_t gfp_mask) { unsigned long order = PAGE_FRAG_CACHE_MAX_ORDER; struct page *page = NULL; gfp_t gfp = gfp_mask; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC; page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, PAGE_FRAG_CACHE_MAX_ORDER); #endif if (unlikely(!page)) { page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); if (unlikely(!page)) { memset(nc, 0, sizeof(*nc)); return NULL; } order = 0; } nc->encoded_va = encode_aligned_va(page_address(page), order, page_is_pfmemalloc(page)); /* Even if we own the page, we do not use atomic_set(). * This would break get_page_unless_zero() users. */ page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); return page; } static struct page *__page_frag_cache_reload(struct page_frag_cache *nc, gfp_t gfp_mask) { struct page *page; if (likely(nc->encoded_va)) { page = __page_frag_cache_reuse(nc->encoded_va, nc->pagecnt_bias); if (page) goto out; } page = __page_frag_cache_refill(nc, gfp_mask); if (unlikely(!page)) return NULL; out: nc->remaining = page_frag_cache_page_size(nc->encoded_va); nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; return page; } void *__page_frag_alloc_va_align(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask, unsigned int align_mask) { unsigned long encoded_va = nc->encoded_va; unsigned int remaining; remaining = nc->remaining & align_mask; if (unlikely(remaining < fragsz)) { if (unlikely(fragsz > PAGE_SIZE)) { /* * The caller is trying to allocate a fragment * with fragsz > PAGE_SIZE but the cache isn't big * enough to satisfy the request, this may * happen in low memory conditions. * We don't release the cache page because * it could make memory pressure worse * so we simply return NULL here. */ return NULL; } if (unlikely(!__page_frag_cache_reload(nc, gfp_mask))) return NULL; nc->pagecnt_bias--; nc->remaining -= fragsz; return encoded_page_address(nc->encoded_va); } nc->pagecnt_bias--; nc->remaining = remaining - fragsz; return encoded_page_address(encoded_va) + (page_frag_cache_page_size(encoded_va) - remaining); }
On Tue, 2024-07-30 at 21:20 +0800, Yunsheng Lin wrote: > On 2024/7/23 21:19, Yunsheng Lin wrote: > > > ... > > > The only piece that is really reused here is the pagecnt_bias > > > assignment. What is obfuscated away is that the order is gotten > > > through one of two paths. Really order isn't order here it is size. > > > Which should have been fetched already. What you end up doing with > > > this change is duplicating a bunch of code throughout the function. > > > You end up having to fetch size multiple times multiple ways. here you > > > are generating it with order. Then you have to turn around and get it > > > again at the start of the function, and again after calling this > > > function in order to pull it back out. > > > > I am assuming you would like to reserve old behavior as below? > > > > if(!encoded_va) { > > refill: > > __page_frag_cache_refill() > > } > > > > > > if(remaining < fragsz) { > > if(!__page_frag_cache_recharge()) > > goto refill; > > } > > > > As we are adding new APIs, are we expecting new APIs also duplicate > > the above pattern? > > > > > > > How about something like below? __page_frag_cache_refill() and > __page_frag_cache_reuse() does what their function name suggests > as much as possible, __page_frag_cache_reload() is added to avoid > new APIs duplicating similar pattern as much as possible, also > avoid fetching size multiple times multiple ways as much as possible. This is better. Still though with the code getting so spread out we probably need to start adding more comments to explain things. > static struct page *__page_frag_cache_reuse(unsigned long encoded_va, > unsigned int pagecnt_bias) > { > struct page *page; > > page = virt_to_page((void *)encoded_va); > if (!page_ref_sub_and_test(page, pagecnt_bias)) > return NULL; > > if (unlikely(encoded_page_pfmemalloc(encoded_va))) { > VM_BUG_ON(compound_order(page) != > encoded_page_order(encoded_va)); This VM_BUG_ON here makes no sense. If we are going to have this anywhere it might make more sense in the cache_refill case below to verify we are setting the order to match when we are generating the encoded_va. > free_unref_page(page, encoded_page_order(encoded_va)); > return NULL; > } > > /* OK, page count is 0, we can safely set it */ > set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1); > return page; Why are you returning page here? It isn't used by any of the callers. We are refilling the page here anyway so any caller should already have access to the page since it wasn't changed. > } > > static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, > gfp_t gfp_mask) > { > unsigned long order = PAGE_FRAG_CACHE_MAX_ORDER; > struct page *page = NULL; > gfp_t gfp = gfp_mask; > > #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) > gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | > __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC; > page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, > PAGE_FRAG_CACHE_MAX_ORDER); I suspect the compliler is probably already doing this, but we should probably not be updating gfp_mask but instead gfp since that is our local variable. > #endif > if (unlikely(!page)) { > page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); > if (unlikely(!page)) { > memset(nc, 0, sizeof(*nc)); > return NULL; > } > > order = 0; > } > > nc->encoded_va = encode_aligned_va(page_address(page), order, > page_is_pfmemalloc(page)); > > /* Even if we own the page, we do not use atomic_set(). > * This would break get_page_unless_zero() users. > */ > page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); > > return page; Again, returning page here doesn't make much sense. You are better off not exposing internals as you have essentially locked the page down for use by the frag API so you shouldn't be handing out the page directly to callers. > } > > static struct page *__page_frag_cache_reload(struct page_frag_cache *nc, > gfp_t gfp_mask) > { > struct page *page; > > if (likely(nc->encoded_va)) { > page = __page_frag_cache_reuse(nc->encoded_va, nc->pagecnt_bias); > if (page) > goto out; > } > > page = __page_frag_cache_refill(nc, gfp_mask); > if (unlikely(!page)) > return NULL; > > out: > nc->remaining = page_frag_cache_page_size(nc->encoded_va); > nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; > return page; Your one current caller doesn't use the page value provided here. I would recommend just not bothering with the page variable until you actually need it. > } > > void *__page_frag_alloc_va_align(struct page_frag_cache *nc, > unsigned int fragsz, gfp_t gfp_mask, > unsigned int align_mask) > { > unsigned long encoded_va = nc->encoded_va; > unsigned int remaining; > > remaining = nc->remaining & align_mask; > if (unlikely(remaining < fragsz)) { You might as well swap the code paths. It would likely be much easier to read the case where you are handling remaining >= fragsz in here rather than having more if statements buried within the if statement. With that you will have more room for the comment and such below. > if (unlikely(fragsz > PAGE_SIZE)) { > /* > * The caller is trying to allocate a fragment > * with fragsz > PAGE_SIZE but the cache isn't big > * enough to satisfy the request, this may > * happen in low memory conditions. > * We don't release the cache page because > * it could make memory pressure worse > * so we simply return NULL here. > */ > return NULL; > } > > if (unlikely(!__page_frag_cache_reload(nc, gfp_mask))) > return NULL; This is what I am talking about in the earlier comments. You go to the trouble of returning page through all the callers just to not use it here. So there isn't any point in passing it through the functions. > > nc->pagecnt_bias--; > nc->remaining -= fragsz; > > return encoded_page_address(nc->encoded_va); > } > > nc->pagecnt_bias--; > nc->remaining = remaining - fragsz; > > return encoded_page_address(encoded_va) + > (page_frag_cache_page_size(encoded_va) - remaining); Parenthesis here shouldn't be needed, addition and subtractions operations can happen in any order with the result coming out the same.
On 2024/7/30 23:12, Alexander H Duyck wrote: ... >> } >> >> nc->pagecnt_bias--; >> nc->remaining = remaining - fragsz; >> >> return encoded_page_address(encoded_va) + >> (page_frag_cache_page_size(encoded_va) - remaining); > > Parenthesis here shouldn't be needed, addition and subtractions > operations can happen in any order with the result coming out the same. I am playing safe to avoid overflow here, as I am not sure if the allocator will give us the last page. For example, '0xfffffffffffff000 + 0x1000' will have a overflow.
On Wed, 2024-07-31 at 20:35 +0800, Yunsheng Lin wrote: > On 2024/7/30 23:12, Alexander H Duyck wrote: > > ... > > > > } > > > > > > nc->pagecnt_bias--; > > > nc->remaining = remaining - fragsz; > > > > > > return encoded_page_address(encoded_va) + > > > (page_frag_cache_page_size(encoded_va) - remaining); > > > > Parenthesis here shouldn't be needed, addition and subtractions > > operations can happen in any order with the result coming out the same. > > I am playing safe to avoid overflow here, as I am not sure if the allocator > will give us the last page. For example, '0xfffffffffffff000 + 0x1000' will > have a overflow. So what if it does though? When you subtract remaining it will underflow and go back to the correct value shouldn't it?
On 2024/8/1 1:02, Alexander H Duyck wrote: > On Wed, 2024-07-31 at 20:35 +0800, Yunsheng Lin wrote: >> On 2024/7/30 23:12, Alexander H Duyck wrote: >> >> ... >> >>>> } >>>> >>>> nc->pagecnt_bias--; >>>> nc->remaining = remaining - fragsz; >>>> >>>> return encoded_page_address(encoded_va) + >>>> (page_frag_cache_page_size(encoded_va) - remaining); >>> >>> Parenthesis here shouldn't be needed, addition and subtractions >>> operations can happen in any order with the result coming out the same. >> >> I am playing safe to avoid overflow here, as I am not sure if the allocator >> will give us the last page. For example, '0xfffffffffffff000 + 0x1000' will >> have a overflow. > > So what if it does though? When you subtract remaining it will > underflow and go back to the correct value shouldn't it? I guess that it is true that underflow will bring back the correct value. But I am not sure what does it hurt to have a parenthesis here, doesn't having a parenthesis make it more obvious that 'size - remaining' indicate the offset of allocated fragment and not having to scratch my head and wondering if there is overflow/underflow problem? Or is there any performance trick behind the above comment?
diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h index 12a16f8e8ad0..5aa45de7a9a5 100644 --- a/include/linux/page_frag_cache.h +++ b/include/linux/page_frag_cache.h @@ -50,7 +50,7 @@ static inline void *encoded_page_address(unsigned long encoded_va) static inline void page_frag_cache_init(struct page_frag_cache *nc) { - nc->encoded_va = 0; + memset(nc, 0, sizeof(*nc)); } static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc) diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c index 7928e5d50711..d9c9cad17af7 100644 --- a/mm/page_frag_cache.c +++ b/mm/page_frag_cache.c @@ -19,6 +19,28 @@ #include <linux/page_frag_cache.h> #include "internal.h" +static struct page *__page_frag_cache_recharge(struct page_frag_cache *nc) +{ + unsigned long encoded_va = nc->encoded_va; + struct page *page; + + page = virt_to_page((void *)encoded_va); + if (!page_ref_sub_and_test(page, nc->pagecnt_bias)) + return NULL; + + if (unlikely(encoded_page_pfmemalloc(encoded_va))) { + VM_BUG_ON(compound_order(page) != + encoded_page_order(encoded_va)); + free_unref_page(page, encoded_page_order(encoded_va)); + return NULL; + } + + /* OK, page count is 0, we can safely set it */ + set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1); + + return page; +} + static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, gfp_t gfp_mask) { @@ -26,6 +48,14 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, struct page *page = NULL; gfp_t gfp = gfp_mask; + if (likely(nc->encoded_va)) { + page = __page_frag_cache_recharge(nc); + if (page) { + order = encoded_page_order(nc->encoded_va); + goto out; + } + } + #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC; @@ -35,7 +65,7 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, if (unlikely(!page)) { page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); if (unlikely(!page)) { - nc->encoded_va = 0; + memset(nc, 0, sizeof(*nc)); return NULL; } @@ -45,6 +75,16 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, nc->encoded_va = encode_aligned_va(page_address(page), order, page_is_pfmemalloc(page)); + /* Even if we own the page, we do not use atomic_set(). + * This would break get_page_unless_zero() users. + */ + page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); + +out: + /* reset page count bias and remaining to start of new frag */ + nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; + nc->remaining = PAGE_SIZE << order; + return page; } @@ -55,7 +95,7 @@ void page_frag_cache_drain(struct page_frag_cache *nc) __page_frag_cache_drain(virt_to_head_page((void *)nc->encoded_va), nc->pagecnt_bias); - nc->encoded_va = 0; + memset(nc, 0, sizeof(*nc)); } EXPORT_SYMBOL(page_frag_cache_drain); @@ -72,31 +112,9 @@ void *__page_frag_alloc_va_align(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask, unsigned int align_mask) { - unsigned long encoded_va = nc->encoded_va; - unsigned int size, remaining; - struct page *page; - - if (unlikely(!encoded_va)) { -refill: - page = __page_frag_cache_refill(nc, gfp_mask); - if (!page) - return NULL; - - encoded_va = nc->encoded_va; - size = page_frag_cache_page_size(encoded_va); + unsigned int size = page_frag_cache_page_size(nc->encoded_va); + unsigned int remaining = nc->remaining & align_mask; - /* Even if we own the page, we do not use atomic_set(). - * This would break get_page_unless_zero() users. - */ - page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); - - /* reset page count bias and remaining to start of new frag */ - nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; - nc->remaining = size; - } - - size = page_frag_cache_page_size(encoded_va); - remaining = nc->remaining & align_mask; if (unlikely(remaining < fragsz)) { if (unlikely(fragsz > PAGE_SIZE)) { /* @@ -111,32 +129,17 @@ void *__page_frag_alloc_va_align(struct page_frag_cache *nc, return NULL; } - page = virt_to_page((void *)encoded_va); - - if (!page_ref_sub_and_test(page, nc->pagecnt_bias)) - goto refill; - - if (unlikely(encoded_page_pfmemalloc(encoded_va))) { - VM_BUG_ON(compound_order(page) != - encoded_page_order(encoded_va)); - free_unref_page(page, encoded_page_order(encoded_va)); - goto refill; - } - - /* OK, page count is 0, we can safely set it */ - set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1); - - /* reset page count bias and remaining to start of new frag */ - nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; - nc->remaining = size; + if (unlikely(!__page_frag_cache_refill(nc, gfp_mask))) + return NULL; + size = page_frag_cache_page_size(nc->encoded_va); remaining = size; } nc->pagecnt_bias--; nc->remaining = remaining - fragsz; - return encoded_page_address(encoded_va) + (size - remaining); + return encoded_page_address(nc->encoded_va) + (size - remaining); } EXPORT_SYMBOL(__page_frag_alloc_va_align);
Refactor common codes from __page_frag_alloc_va_align() to __page_frag_cache_refill(), so that the new API can make use of them. CC: Alexander Duyck <alexander.duyck@gmail.com> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> --- include/linux/page_frag_cache.h | 2 +- mm/page_frag_cache.c | 93 +++++++++++++++++---------------- 2 files changed, 49 insertions(+), 46 deletions(-)