Message ID | 20230623164015.3431990-3-jiaqiyan@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Improve hugetlbfs read on HWPOISON hugepages | expand |
On 06/23/23 16:40, Jiaqi Yan wrote: > Adds the functionality to tell if a subpage of a hugetlb folio is a > raw HWPOISON page. This functionality relies on RawHwpUnreliable to > be not set; otherwise hugepage's HWPOISON list becomes meaningless. > > Exports this functionality to be immediately used in the read operation > for hugetlbfs. > > Signed-off-by: Jiaqi Yan <jiaqiyan@google.com> > --- > include/linux/hugetlb.h | 19 +++++++++++++++++++ > include/linux/mm.h | 7 +++++++ > mm/hugetlb.c | 10 ++++++++++ > mm/memory-failure.c | 34 ++++++++++++++++++++++++---------- > 4 files changed, 60 insertions(+), 10 deletions(-) > > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h > index 21f942025fec..8b73a12b7b38 100644 > --- a/include/linux/hugetlb.h > +++ b/include/linux/hugetlb.h > @@ -1013,6 +1013,25 @@ void hugetlb_register_node(struct node *node); > void hugetlb_unregister_node(struct node *node); > #endif > > +/* > + * Struct raw_hwp_page represents information about "raw error page", > + * constructing singly linked list from ->_hugetlb_hwpoison field of folio. > + */ > +struct raw_hwp_page { > + struct llist_node node; > + struct page *page; > +}; > + > +static inline struct llist_head *raw_hwp_list_head(struct folio *folio) > +{ > + return (struct llist_head *)&folio->_hugetlb_hwpoison; > +} > + > +/* > + * Check if a given raw @subpage in a hugepage @folio is HWPOISON. > + */ > +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage); > + > #else /* CONFIG_HUGETLB_PAGE */ > struct hstate {}; > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 66032f0d515c..41a283bd41a7 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -3671,6 +3671,7 @@ extern const struct attribute_group memory_failure_attr_group; > extern void memory_failure_queue(unsigned long pfn, int flags); > extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, > bool *migratable_cleared); > +extern bool __is_raw_hwp_subpage(struct folio *folio, struct page *subpage); > void num_poisoned_pages_inc(unsigned long pfn); > void num_poisoned_pages_sub(unsigned long pfn, long i); > struct task_struct *task_early_kill(struct task_struct *tsk, int force_early); > @@ -3685,6 +3686,12 @@ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, > return 0; > } > > +static inline bool __is_raw_hwp_subpage(struct folio *folio, > + struct page *subpage) > +{ > + return false; > +} > + > static inline void num_poisoned_pages_inc(unsigned long pfn) > { > } > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index ea24718db4af..6b860de87590 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -7377,6 +7377,16 @@ int get_huge_page_for_hwpoison(unsigned long pfn, int flags, > return ret; > } > > +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage) > +{ > + bool ret; > + > + spin_lock_irq(&hugetlb_lock); > + ret = __is_raw_hwp_subpage(folio, subpage); > + spin_unlock_irq(&hugetlb_lock); Can you describe what races the hugetlb_lock prevents here?
On Wed, Jul 5, 2023 at 4:57 PM Mike Kravetz <mike.kravetz@oracle.com> wrote: > > On 06/23/23 16:40, Jiaqi Yan wrote: > > Adds the functionality to tell if a subpage of a hugetlb folio is a > > raw HWPOISON page. This functionality relies on RawHwpUnreliable to > > be not set; otherwise hugepage's HWPOISON list becomes meaningless. > > > > Exports this functionality to be immediately used in the read operation > > for hugetlbfs. > > > > Signed-off-by: Jiaqi Yan <jiaqiyan@google.com> > > --- > > include/linux/hugetlb.h | 19 +++++++++++++++++++ > > include/linux/mm.h | 7 +++++++ > > mm/hugetlb.c | 10 ++++++++++ > > mm/memory-failure.c | 34 ++++++++++++++++++++++++---------- > > 4 files changed, 60 insertions(+), 10 deletions(-) > > > > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h > > index 21f942025fec..8b73a12b7b38 100644 > > --- a/include/linux/hugetlb.h > > +++ b/include/linux/hugetlb.h > > @@ -1013,6 +1013,25 @@ void hugetlb_register_node(struct node *node); > > void hugetlb_unregister_node(struct node *node); > > #endif > > > > +/* > > + * Struct raw_hwp_page represents information about "raw error page", > > + * constructing singly linked list from ->_hugetlb_hwpoison field of folio. > > + */ > > +struct raw_hwp_page { > > + struct llist_node node; > > + struct page *page; > > +}; > > + > > +static inline struct llist_head *raw_hwp_list_head(struct folio *folio) > > +{ > > + return (struct llist_head *)&folio->_hugetlb_hwpoison; > > +} > > + > > +/* > > + * Check if a given raw @subpage in a hugepage @folio is HWPOISON. > > + */ > > +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage); > > + > > #else /* CONFIG_HUGETLB_PAGE */ > > struct hstate {}; > > > > diff --git a/include/linux/mm.h b/include/linux/mm.h > > index 66032f0d515c..41a283bd41a7 100644 > > --- a/include/linux/mm.h > > +++ b/include/linux/mm.h > > @@ -3671,6 +3671,7 @@ extern const struct attribute_group memory_failure_attr_group; > > extern void memory_failure_queue(unsigned long pfn, int flags); > > extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, > > bool *migratable_cleared); > > +extern bool __is_raw_hwp_subpage(struct folio *folio, struct page *subpage); > > void num_poisoned_pages_inc(unsigned long pfn); > > void num_poisoned_pages_sub(unsigned long pfn, long i); > > struct task_struct *task_early_kill(struct task_struct *tsk, int force_early); > > @@ -3685,6 +3686,12 @@ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, > > return 0; > > } > > > > +static inline bool __is_raw_hwp_subpage(struct folio *folio, > > + struct page *subpage) > > +{ > > + return false; > > +} > > + > > static inline void num_poisoned_pages_inc(unsigned long pfn) > > { > > } > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > > index ea24718db4af..6b860de87590 100644 > > --- a/mm/hugetlb.c > > +++ b/mm/hugetlb.c > > @@ -7377,6 +7377,16 @@ int get_huge_page_for_hwpoison(unsigned long pfn, int flags, > > return ret; > > } > > > > +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage) > > +{ > > + bool ret; > > + > > + spin_lock_irq(&hugetlb_lock); > > + ret = __is_raw_hwp_subpage(folio, subpage); > > + spin_unlock_irq(&hugetlb_lock); > > Can you describe what races the hugetlb_lock prevents here? I think we should sync here with __get_huge_page_for_hwpoison, who iterates and inserts an entry to raw_hwp_list. llist itself doesn't ensure insertion is synchronized with iterating from __is_raw_hwp_subpage. > -- > Mike Kravetz > > > + return ret; > > +} > > + > > void folio_putback_active_hugetlb(struct folio *folio) > > { > > spin_lock_irq(&hugetlb_lock); > > diff --git a/mm/memory-failure.c b/mm/memory-failure.c > > index c415c3c462a3..891248e2930e 100644 > > --- a/mm/memory-failure.c > > +++ b/mm/memory-failure.c > > @@ -1809,18 +1809,32 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs); > > #endif /* CONFIG_FS_DAX */ > > > > #ifdef CONFIG_HUGETLB_PAGE > > -/* > > - * Struct raw_hwp_page represents information about "raw error page", > > - * constructing singly linked list from ->_hugetlb_hwpoison field of folio. > > - */ > > -struct raw_hwp_page { > > - struct llist_node node; > > - struct page *page; > > -}; > > > > -static inline struct llist_head *raw_hwp_list_head(struct folio *folio) > > +bool __is_raw_hwp_subpage(struct folio *folio, struct page *subpage) > > { > > - return (struct llist_head *)&folio->_hugetlb_hwpoison; > > + struct llist_head *raw_hwp_head; > > + struct raw_hwp_page *p, *tmp; > > + bool ret = false; > > + > > + if (!folio_test_hwpoison(folio)) > > + return false; > > + > > + /* > > + * When RawHwpUnreliable is set, kernel lost track of which subpages > > + * are HWPOISON. So return as if ALL subpages are HWPOISONed. > > + */ > > + if (folio_test_hugetlb_raw_hwp_unreliable(folio)) > > + return true; > > + > > + raw_hwp_head = raw_hwp_list_head(folio); > > + llist_for_each_entry_safe(p, tmp, raw_hwp_head->first, node) { > > + if (subpage == p->page) { > > + ret = true; > > + break; > > + } > > + } > > + > > + return ret; > > } > > > > static unsigned long __folio_free_raw_hwp(struct folio *folio, bool move_flag) > > -- > > 2.41.0.162.gfafddb0af9-goog > >
On 07/06/23 11:25, Jiaqi Yan wrote: > On Wed, Jul 5, 2023 at 4:57 PM Mike Kravetz <mike.kravetz@oracle.com> wrote: > > On 06/23/23 16:40, Jiaqi Yan wrote: > > > > > > +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage) > > > +{ > > > + bool ret; > > > + > > > + spin_lock_irq(&hugetlb_lock); > > > + ret = __is_raw_hwp_subpage(folio, subpage); > > > + spin_unlock_irq(&hugetlb_lock); > > > > Can you describe what races the hugetlb_lock prevents here? > > I think we should sync here with __get_huge_page_for_hwpoison, who > iterates and inserts an entry to raw_hwp_list. llist itself doesn't > ensure insertion is synchronized with iterating from > __is_raw_hwp_subpage. > Ok, makes sense. And, since this is only called in the file read patch when we encounter a PageHWPoison(page), the overhead of the lock cycles is not of concern. You can add, Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
On Fri, Jun 23, 2023 at 04:40:13PM +0000, Jiaqi Yan wrote: > Adds the functionality to tell if a subpage of a hugetlb folio is a > raw HWPOISON page. This functionality relies on RawHwpUnreliable to > be not set; otherwise hugepage's HWPOISON list becomes meaningless. > > Exports this functionality to be immediately used in the read operation > for hugetlbfs. > > Signed-off-by: Jiaqi Yan <jiaqiyan@google.com> Looks good to me, thank you. Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
On Thu, Jul 6, 2023 at 3:06 PM Mike Kravetz <mike.kravetz@oracle.com> wrote: > > On 07/06/23 11:25, Jiaqi Yan wrote: > > On Wed, Jul 5, 2023 at 4:57 PM Mike Kravetz <mike.kravetz@oracle.com> wrote: > > > On 06/23/23 16:40, Jiaqi Yan wrote: > > > > > > > > +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage) > > > > +{ > > > > + bool ret; > > > > + > > > > + spin_lock_irq(&hugetlb_lock); > > > > + ret = __is_raw_hwp_subpage(folio, subpage); > > > > + spin_unlock_irq(&hugetlb_lock); > > > > > > Can you describe what races the hugetlb_lock prevents here? > > > > I think we should sync here with __get_huge_page_for_hwpoison, who > > iterates and inserts an entry to raw_hwp_list. llist itself doesn't > > ensure insertion is synchronized with iterating from > > __is_raw_hwp_subpage. > > > > Ok, makes sense. And, since this is only called in the file read patch > when we encounter a PageHWPoison(page), the overhead of the lock cycles > is not of concern. Yes, thanks for pointing this out, (which I forgot), as is_raw_hwp_subpage() is after PageHWPoison check in hugetlbfs_read_iter. I think both this and the reason why holding the lock is worth mentioning in the commit msg. > > You can add, > > Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com> > -- > Mike Kravetz
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 21f942025fec..8b73a12b7b38 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -1013,6 +1013,25 @@ void hugetlb_register_node(struct node *node); void hugetlb_unregister_node(struct node *node); #endif +/* + * Struct raw_hwp_page represents information about "raw error page", + * constructing singly linked list from ->_hugetlb_hwpoison field of folio. + */ +struct raw_hwp_page { + struct llist_node node; + struct page *page; +}; + +static inline struct llist_head *raw_hwp_list_head(struct folio *folio) +{ + return (struct llist_head *)&folio->_hugetlb_hwpoison; +} + +/* + * Check if a given raw @subpage in a hugepage @folio is HWPOISON. + */ +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage); + #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; diff --git a/include/linux/mm.h b/include/linux/mm.h index 66032f0d515c..41a283bd41a7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3671,6 +3671,7 @@ extern const struct attribute_group memory_failure_attr_group; extern void memory_failure_queue(unsigned long pfn, int flags); extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); +extern bool __is_raw_hwp_subpage(struct folio *folio, struct page *subpage); void num_poisoned_pages_inc(unsigned long pfn); void num_poisoned_pages_sub(unsigned long pfn, long i); struct task_struct *task_early_kill(struct task_struct *tsk, int force_early); @@ -3685,6 +3686,12 @@ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, return 0; } +static inline bool __is_raw_hwp_subpage(struct folio *folio, + struct page *subpage) +{ + return false; +} + static inline void num_poisoned_pages_inc(unsigned long pfn) { } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ea24718db4af..6b860de87590 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7377,6 +7377,16 @@ int get_huge_page_for_hwpoison(unsigned long pfn, int flags, return ret; } +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage) +{ + bool ret; + + spin_lock_irq(&hugetlb_lock); + ret = __is_raw_hwp_subpage(folio, subpage); + spin_unlock_irq(&hugetlb_lock); + return ret; +} + void folio_putback_active_hugetlb(struct folio *folio) { spin_lock_irq(&hugetlb_lock); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c415c3c462a3..891248e2930e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1809,18 +1809,32 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs); #endif /* CONFIG_FS_DAX */ #ifdef CONFIG_HUGETLB_PAGE -/* - * Struct raw_hwp_page represents information about "raw error page", - * constructing singly linked list from ->_hugetlb_hwpoison field of folio. - */ -struct raw_hwp_page { - struct llist_node node; - struct page *page; -}; -static inline struct llist_head *raw_hwp_list_head(struct folio *folio) +bool __is_raw_hwp_subpage(struct folio *folio, struct page *subpage) { - return (struct llist_head *)&folio->_hugetlb_hwpoison; + struct llist_head *raw_hwp_head; + struct raw_hwp_page *p, *tmp; + bool ret = false; + + if (!folio_test_hwpoison(folio)) + return false; + + /* + * When RawHwpUnreliable is set, kernel lost track of which subpages + * are HWPOISON. So return as if ALL subpages are HWPOISONed. + */ + if (folio_test_hugetlb_raw_hwp_unreliable(folio)) + return true; + + raw_hwp_head = raw_hwp_list_head(folio); + llist_for_each_entry_safe(p, tmp, raw_hwp_head->first, node) { + if (subpage == p->page) { + ret = true; + break; + } + } + + return ret; } static unsigned long __folio_free_raw_hwp(struct folio *folio, bool move_flag)
Adds the functionality to tell if a subpage of a hugetlb folio is a raw HWPOISON page. This functionality relies on RawHwpUnreliable to be not set; otherwise hugepage's HWPOISON list becomes meaningless. Exports this functionality to be immediately used in the read operation for hugetlbfs. Signed-off-by: Jiaqi Yan <jiaqiyan@google.com> --- include/linux/hugetlb.h | 19 +++++++++++++++++++ include/linux/mm.h | 7 +++++++ mm/hugetlb.c | 10 ++++++++++ mm/memory-failure.c | 34 ++++++++++++++++++++++++---------- 4 files changed, 60 insertions(+), 10 deletions(-)