Message ID | 20220323075714.2345743-1-yang.yang29@zte.com.cn (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [v2] mm/vmstat: add events for ksm cow | expand |
On 23.03.22 08:57, cgel.zte@gmail.com wrote: > From: Yang Yang <yang.yang29@zte.com.cn> > > Users may use ksm by calling madvise(, , MADV_MERGEABLE) when they want > to save memory, it's a tradeoff by suffering delay on ksm cow. Users can > get to know how much memory ksm saved by reading > /sys/kernel/mm/ksm/pages_sharing, but they don't know what's the costs > of ksm cow, and this is important of some delay sensitive tasks. > > So add ksm cow events to help users evaluate whether or how to use ksm. > > Signed-off-by: Yang Yang <yang.yang29@zte.com.cn> > Reviewed-by: xu xin <xu.xin16@zte.com.cn> > Reviewed-by: Ran Xiaokai <ran.xiaokai@zte.com.cn> > --- > v2: > - fix compile error when CONFIG_KSM is not set > --- > include/linux/vm_event_item.h | 2 ++ > mm/memory.c | 20 +++++++++++++++++--- > mm/vmstat.c | 2 ++ > 3 files changed, 21 insertions(+), 3 deletions(-) > > diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h > index 16a0a4fd000b..6f32be04212f 100644 > --- a/include/linux/vm_event_item.h > +++ b/include/linux/vm_event_item.h > @@ -131,6 +131,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, > SWAP_RA_HIT, > #ifdef CONFIG_KSM > KSM_SWPIN_COPY, > + KSM_COW_SUCCESS, > + KSM_COW_FAIL, > #endif > #endif > #ifdef CONFIG_X86 > diff --git a/mm/memory.c b/mm/memory.c > index 4111f97c91a0..c24d5f04fab5 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -3257,6 +3257,8 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) > __releases(vmf->ptl) > { > struct vm_area_struct *vma = vmf->vma; > + vm_fault_t ret = 0; > + bool ksm = 0; > > if (userfaultfd_pte_wp(vma, *vmf->pte)) { > pte_unmap_unlock(vmf->pte, vmf->ptl); > @@ -3294,6 +3296,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) > */ > if (PageAnon(vmf->page)) { > struct page *page = vmf->page; > + ksm = PageKsm(page); > > /* > * We have to verify under page lock: these early checks are > @@ -3302,7 +3305,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) > * > * PageKsm() doesn't necessarily raise the page refcount. > */ > - if (PageKsm(page) || page_count(page) > 3) > + if (ksm || page_count(page) > 3) > goto copy; > if (!PageLRU(page)) > /* > @@ -3316,7 +3319,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) > goto copy; > if (PageSwapCache(page)) > try_to_free_swap(page); > - if (PageKsm(page) || page_count(page) != 1) { > + if (ksm || page_count(page) != 1) { I think we really want to recheck here, after locking the page. Consequently, just do a PageKsm() check below. > unlock_page(page); > goto copy; > } > @@ -3339,7 +3342,18 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) > get_page(vmf->page); > > pte_unmap_unlock(vmf->pte, vmf->ptl); > - return wp_page_copy(vmf); > + ret = wp_page_copy(vmf); > + > +#ifdef CONFIG_KSM > + if (ksm) { > + if (unlikely(ret & VM_FAULT_ERROR)) > + count_vm_event(KSM_COW_FAIL); > + else > + count_vm_event(KSM_COW_SUCCESS); > + } > +#endif Do we really care if we failed or not? I mean, the failure case will usually make your app crash either way ... due to OOM. Doing #ifdef CONFIG_KSM if (PageKsm(page)) count_vm_event(COW_KSM); #endif before the wp_page_copy(vmf) should be good enough, no? Should be good enough IMHO.
On Wed, Mar 23, 2022 at 07:43:04PM +0100, David Hildenbrand wrote: > On 23.03.22 08:57, cgel.zte@gmail.com wrote: > > From: Yang Yang <yang.yang29@zte.com.cn> > > > > Users may use ksm by calling madvise(, , MADV_MERGEABLE) when they want > > to save memory, it's a tradeoff by suffering delay on ksm cow. Users can > > get to know how much memory ksm saved by reading > > /sys/kernel/mm/ksm/pages_sharing, but they don't know what's the costs > > of ksm cow, and this is important of some delay sensitive tasks. > > > > So add ksm cow events to help users evaluate whether or how to use ksm. > > > > Signed-off-by: Yang Yang <yang.yang29@zte.com.cn> > > Reviewed-by: xu xin <xu.xin16@zte.com.cn> > > Reviewed-by: Ran Xiaokai <ran.xiaokai@zte.com.cn> > > --- > > v2: > > - fix compile error when CONFIG_KSM is not set > > --- > > include/linux/vm_event_item.h | 2 ++ > > mm/memory.c | 20 +++++++++++++++++--- > > mm/vmstat.c | 2 ++ > > 3 files changed, 21 insertions(+), 3 deletions(-) > > > > diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h > > index 16a0a4fd000b..6f32be04212f 100644 > > --- a/include/linux/vm_event_item.h > > +++ b/include/linux/vm_event_item.h > > @@ -131,6 +131,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, > > SWAP_RA_HIT, > > #ifdef CONFIG_KSM > > KSM_SWPIN_COPY, > > + KSM_COW_SUCCESS, > > + KSM_COW_FAIL, > > #endif > > #endif > > #ifdef CONFIG_X86 > > diff --git a/mm/memory.c b/mm/memory.c > > index 4111f97c91a0..c24d5f04fab5 100644 > > --- a/mm/memory.c > > +++ b/mm/memory.c > > @@ -3257,6 +3257,8 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) > > __releases(vmf->ptl) > > { > > struct vm_area_struct *vma = vmf->vma; > > + vm_fault_t ret = 0; > > + bool ksm = 0; > > > > if (userfaultfd_pte_wp(vma, *vmf->pte)) { > > pte_unmap_unlock(vmf->pte, vmf->ptl); > > @@ -3294,6 +3296,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) > > */ > > if (PageAnon(vmf->page)) { > > struct page *page = vmf->page; > > + ksm = PageKsm(page); > > > > /* > > * We have to verify under page lock: these early checks are > > @@ -3302,7 +3305,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) > > * > > * PageKsm() doesn't necessarily raise the page refcount. > > */ > > - if (PageKsm(page) || page_count(page) > 3) > > + if (ksm || page_count(page) > 3) > > goto copy; > > if (!PageLRU(page)) > > /* > > @@ -3316,7 +3319,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) > > goto copy; > > if (PageSwapCache(page)) > > try_to_free_swap(page); > > - if (PageKsm(page) || page_count(page) != 1) { > > + if (ksm || page_count(page) != 1) { > > I think we really want to recheck here, after locking the page. > Consequently, just do a PageKsm() check below. > > > unlock_page(page); > > goto copy; > > } > > @@ -3339,7 +3342,18 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) > > get_page(vmf->page); > > > > pte_unmap_unlock(vmf->pte, vmf->ptl); > > - return wp_page_copy(vmf); > > + ret = wp_page_copy(vmf); > > + > > +#ifdef CONFIG_KSM > > + if (ksm) { > > + if (unlikely(ret & VM_FAULT_ERROR)) > > + count_vm_event(KSM_COW_FAIL); > > + else > > + count_vm_event(KSM_COW_SUCCESS); > > + } > > +#endif > > Do we really care if we failed or not? I mean, the failure case will > usually make your app crash either way ... due to OOM. > I think we need failed count. Because ksm cow oom is a little different from general OOM. User may wonder I am not allocing new memory, why it cause OOM? And OOM may have big impact for some kind of tasks, so we better let user know that, do we? > > Doing > > #ifdef CONFIG_KSM > if (PageKsm(page)) > count_vm_event(COW_KSM); > #endif > > before the wp_page_copy(vmf) should be good enough, no? > > Should be good enough IMHO. > > -- > Thanks, > > David / dhildenb
On 24.03.22 02:36, CGEL wrote: > On Wed, Mar 23, 2022 at 07:43:04PM +0100, David Hildenbrand wrote: >> On 23.03.22 08:57, cgel.zte@gmail.com wrote: >>> From: Yang Yang <yang.yang29@zte.com.cn> >>> >>> Users may use ksm by calling madvise(, , MADV_MERGEABLE) when they want >>> to save memory, it's a tradeoff by suffering delay on ksm cow. Users can >>> get to know how much memory ksm saved by reading >>> /sys/kernel/mm/ksm/pages_sharing, but they don't know what's the costs >>> of ksm cow, and this is important of some delay sensitive tasks. >>> >>> So add ksm cow events to help users evaluate whether or how to use ksm. >>> >>> Signed-off-by: Yang Yang <yang.yang29@zte.com.cn> >>> Reviewed-by: xu xin <xu.xin16@zte.com.cn> >>> Reviewed-by: Ran Xiaokai <ran.xiaokai@zte.com.cn> >>> --- >>> v2: >>> - fix compile error when CONFIG_KSM is not set >>> --- >>> include/linux/vm_event_item.h | 2 ++ >>> mm/memory.c | 20 +++++++++++++++++--- >>> mm/vmstat.c | 2 ++ >>> 3 files changed, 21 insertions(+), 3 deletions(-) >>> >>> diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h >>> index 16a0a4fd000b..6f32be04212f 100644 >>> --- a/include/linux/vm_event_item.h >>> +++ b/include/linux/vm_event_item.h >>> @@ -131,6 +131,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, >>> SWAP_RA_HIT, >>> #ifdef CONFIG_KSM >>> KSM_SWPIN_COPY, >>> + KSM_COW_SUCCESS, >>> + KSM_COW_FAIL, >>> #endif >>> #endif >>> #ifdef CONFIG_X86 >>> diff --git a/mm/memory.c b/mm/memory.c >>> index 4111f97c91a0..c24d5f04fab5 100644 >>> --- a/mm/memory.c >>> +++ b/mm/memory.c >>> @@ -3257,6 +3257,8 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) >>> __releases(vmf->ptl) >>> { >>> struct vm_area_struct *vma = vmf->vma; >>> + vm_fault_t ret = 0; >>> + bool ksm = 0; >>> >>> if (userfaultfd_pte_wp(vma, *vmf->pte)) { >>> pte_unmap_unlock(vmf->pte, vmf->ptl); >>> @@ -3294,6 +3296,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) >>> */ >>> if (PageAnon(vmf->page)) { >>> struct page *page = vmf->page; >>> + ksm = PageKsm(page); >>> >>> /* >>> * We have to verify under page lock: these early checks are >>> @@ -3302,7 +3305,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) >>> * >>> * PageKsm() doesn't necessarily raise the page refcount. >>> */ >>> - if (PageKsm(page) || page_count(page) > 3) >>> + if (ksm || page_count(page) > 3) >>> goto copy; >>> if (!PageLRU(page)) >>> /* >>> @@ -3316,7 +3319,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) >>> goto copy; >>> if (PageSwapCache(page)) >>> try_to_free_swap(page); >>> - if (PageKsm(page) || page_count(page) != 1) { >>> + if (ksm || page_count(page) != 1) { >> >> I think we really want to recheck here, after locking the page. >> Consequently, just do a PageKsm() check below. >> >>> unlock_page(page); >>> goto copy; >>> } >>> @@ -3339,7 +3342,18 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) >>> get_page(vmf->page); >>> >>> pte_unmap_unlock(vmf->pte, vmf->ptl); >>> - return wp_page_copy(vmf); >>> + ret = wp_page_copy(vmf); >>> + >>> +#ifdef CONFIG_KSM >>> + if (ksm) { >>> + if (unlikely(ret & VM_FAULT_ERROR)) >>> + count_vm_event(KSM_COW_FAIL); >>> + else >>> + count_vm_event(KSM_COW_SUCCESS); >>> + } >>> +#endif >> >> Do we really care if we failed or not? I mean, the failure case will >> usually make your app crash either way ... due to OOM. >> > I think we need failed count. Because ksm cow oom is a little different > from general OOM. User may wonder I am not allocing new memory, why it > cause OOM? And OOM may have big impact for some kind of tasks, so we > better let user know that, do we? The log will be flooded by messages from the OOM handler, how will one counter regarding KSM help? I really don't think this is of any use.
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 16a0a4fd000b..6f32be04212f 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -131,6 +131,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, SWAP_RA_HIT, #ifdef CONFIG_KSM KSM_SWPIN_COPY, + KSM_COW_SUCCESS, + KSM_COW_FAIL, #endif #endif #ifdef CONFIG_X86 diff --git a/mm/memory.c b/mm/memory.c index 4111f97c91a0..c24d5f04fab5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3257,6 +3257,8 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; + vm_fault_t ret = 0; + bool ksm = 0; if (userfaultfd_pte_wp(vma, *vmf->pte)) { pte_unmap_unlock(vmf->pte, vmf->ptl); @@ -3294,6 +3296,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) */ if (PageAnon(vmf->page)) { struct page *page = vmf->page; + ksm = PageKsm(page); /* * We have to verify under page lock: these early checks are @@ -3302,7 +3305,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) * * PageKsm() doesn't necessarily raise the page refcount. */ - if (PageKsm(page) || page_count(page) > 3) + if (ksm || page_count(page) > 3) goto copy; if (!PageLRU(page)) /* @@ -3316,7 +3319,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) goto copy; if (PageSwapCache(page)) try_to_free_swap(page); - if (PageKsm(page) || page_count(page) != 1) { + if (ksm || page_count(page) != 1) { unlock_page(page); goto copy; } @@ -3339,7 +3342,18 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) get_page(vmf->page); pte_unmap_unlock(vmf->pte, vmf->ptl); - return wp_page_copy(vmf); + ret = wp_page_copy(vmf); + +#ifdef CONFIG_KSM + if (ksm) { + if (unlikely(ret & VM_FAULT_ERROR)) + count_vm_event(KSM_COW_FAIL); + else + count_vm_event(KSM_COW_SUCCESS); + } +#endif + + return ret; } static void unmap_mapping_range_vma(struct vm_area_struct *vma, diff --git a/mm/vmstat.c b/mm/vmstat.c index d5cc8d739fac..a2c29a5206ec 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1390,6 +1390,8 @@ const char * const vmstat_text[] = { "swap_ra_hit", #ifdef CONFIG_KSM "ksm_swpin_copy", + "ksm_cow_success", + "ksm_cow_fail", #endif #endif #ifdef CONFIG_X86