diff mbox series

[v2,2/3] mm: memory-failure: Bump memory failure stats to pglist_data

Message ID 20230120034622.2698268-3-jiaqiyan@google.com (mailing list archive)
State New
Headers show
Series Introduce per NUMA node memory error statistics | expand

Commit Message

Jiaqi Yan Jan. 20, 2023, 3:46 a.m. UTC
Right before memory_failure finishes its handling, accumulate poisoned
page's resolution counters to pglist_data's memory_failure_stats, so as
to update the corresponding sysfs entries.

Tested:
1) Start an application to allocate memory buffer chunks
2) Convert random memory buffer addresses to physical addresses
3) Inject memory errors using EINJ at chosen physical addresses
4) Access poisoned memory buffer and recover from SIGBUS
5) Check counter values under
   /sys/devices/system/node/node*/memory_failure/*

Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Jiaqi Yan <jiaqiyan@google.com>
---
 mm/memory-failure.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

Comments

HORIGUCHI NAOYA(堀口 直也) Jan. 23, 2023, 2:42 a.m. UTC | #1
On Fri, Jan 20, 2023 at 03:46:21AM +0000, Jiaqi Yan wrote:
> Right before memory_failure finishes its handling, accumulate poisoned
> page's resolution counters to pglist_data's memory_failure_stats, so as
> to update the corresponding sysfs entries.
> 
> Tested:
> 1) Start an application to allocate memory buffer chunks
> 2) Convert random memory buffer addresses to physical addresses
> 3) Inject memory errors using EINJ at chosen physical addresses
> 4) Access poisoned memory buffer and recover from SIGBUS
> 5) Check counter values under
>    /sys/devices/system/node/node*/memory_failure/*
> 
> Acked-by: David Rientjes <rientjes@google.com>
> Signed-off-by: Jiaqi Yan <jiaqiyan@google.com>

Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Kefeng Wang Feb. 2, 2023, 6:56 a.m. UTC | #2
On 2023/1/20 11:46, Jiaqi Yan wrote:
> Right before memory_failure finishes its handling, accumulate poisoned
> page's resolution counters to pglist_data's memory_failure_stats, so as
> to update the corresponding sysfs entries.
> 
> Tested:
> 1) Start an application to allocate memory buffer chunks
> 2) Convert random memory buffer addresses to physical addresses
> 3) Inject memory errors using EINJ at chosen physical addresses
> 4) Access poisoned memory buffer and recover from SIGBUS
> 5) Check counter values under
>     /sys/devices/system/node/node*/memory_failure/*
> 
> Acked-by: David Rientjes <rientjes@google.com>
> Signed-off-by: Jiaqi Yan <jiaqiyan@google.com>
> ---
>   mm/memory-failure.c | 36 ++++++++++++++++++++++++++++++++++++
>   1 file changed, 36 insertions(+)
> 
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index c628f1db3a4d..f4990839ea66 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1227,6 +1227,39 @@ static struct page_state error_states[] = {
>   #undef slab
>   #undef reserved
>   
> +static void update_per_node_mf_stats(unsigned long pfn,
> +				     enum mf_result result)
> +{
> +	int nid = MAX_NUMNODES;
> +	struct memory_failure_stats *mf_stats = NULL;
> +
> +	nid = pfn_to_nid(pfn);
> +	if (unlikely(nid < 0 || nid >= MAX_NUMNODES)) {
> +		WARN_ONCE(1, "Memory failure: pfn=%#lx, invalid nid=%d", pfn, nid);
> +		return;
> +	}
> +
...
> +	default:
> +		WARN_ONCE(1, "Memory failure: mf_result=%d is not properly handled", result);
> +		break;
> +	}

We already define pr_fmt, the "Memory failure:" prefix should be dropped.
Jiaqi Yan Feb. 4, 2023, 11:17 p.m. UTC | #3
On Wed, Feb 1, 2023 at 10:56 PM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>
>
>
> On 2023/1/20 11:46, Jiaqi Yan wrote:
> > Right before memory_failure finishes its handling, accumulate poisoned
> > page's resolution counters to pglist_data's memory_failure_stats, so as
> > to update the corresponding sysfs entries.
> >
> > Tested:
> > 1) Start an application to allocate memory buffer chunks
> > 2) Convert random memory buffer addresses to physical addresses
> > 3) Inject memory errors using EINJ at chosen physical addresses
> > 4) Access poisoned memory buffer and recover from SIGBUS
> > 5) Check counter values under
> >     /sys/devices/system/node/node*/memory_failure/*
> >
> > Acked-by: David Rientjes <rientjes@google.com>
> > Signed-off-by: Jiaqi Yan <jiaqiyan@google.com>
> > ---
> >   mm/memory-failure.c | 36 ++++++++++++++++++++++++++++++++++++
> >   1 file changed, 36 insertions(+)
> >
> > diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> > index c628f1db3a4d..f4990839ea66 100644
> > --- a/mm/memory-failure.c
> > +++ b/mm/memory-failure.c
> > @@ -1227,6 +1227,39 @@ static struct page_state error_states[] = {
> >   #undef slab
> >   #undef reserved
> >
> > +static void update_per_node_mf_stats(unsigned long pfn,
> > +                                  enum mf_result result)
> > +{
> > +     int nid = MAX_NUMNODES;
> > +     struct memory_failure_stats *mf_stats = NULL;
> > +
> > +     nid = pfn_to_nid(pfn);
> > +     if (unlikely(nid < 0 || nid >= MAX_NUMNODES)) {
> > +             WARN_ONCE(1, "Memory failure: pfn=%#lx, invalid nid=%d", pfn, nid);
> > +             return;
> > +     }
> > +
> ...
> > +     default:
> > +             WARN_ONCE(1, "Memory failure: mf_result=%d is not properly handled", result);
> > +             break;
> > +     }
>
> We already define pr_fmt, the "Memory failure:" prefix should be dropped.

"Should be dropped" because it will print duplicated prefixes? Does
WARN_ONCE also automatically include pr_fmt? I don't think that's the
case when I read __warn_printk.

This is what I saw from dmesg when I add a `WARN_ONCE(1, "Memory
failure: pfn=%#lx\n", pfn);`
at the beginning of `update_per_node_mf_stats` (above `nid=pfn_to_nid(pfn)`):

[  523.942688] ------------[ cut here ]------------
[  523.972026] Memory failure: pfn=0x309f8f3
[  523.972038] WARNING: CPU: 4 PID: 21119 at mm/memory-failure.c:1236
action_result+0xec/0x150
[  523.972044] Modules linked in: einj vfat fat i2c_mux_pca954x
i2c_mux spidev cdc_acm xhci_pci xhci_hcd sha3_generic gq(O)
[  523.972054] CPU: 4 PID: 21119 Comm: usemem Tainted: G S M       O
    6.2.0-smp-DEV #1
[  523.972059] RIP: 0010:action_result+0xec/0x150

No duplicated "Memory failure:".

But I realize I should probably add "\n" within WARN_ONCE.
diff mbox series

Patch

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index c628f1db3a4d..f4990839ea66 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1227,6 +1227,39 @@  static struct page_state error_states[] = {
 #undef slab
 #undef reserved
 
+static void update_per_node_mf_stats(unsigned long pfn,
+				     enum mf_result result)
+{
+	int nid = MAX_NUMNODES;
+	struct memory_failure_stats *mf_stats = NULL;
+
+	nid = pfn_to_nid(pfn);
+	if (unlikely(nid < 0 || nid >= MAX_NUMNODES)) {
+		WARN_ONCE(1, "Memory failure: pfn=%#lx, invalid nid=%d", pfn, nid);
+		return;
+	}
+
+	mf_stats = &NODE_DATA(nid)->mf_stats;
+	switch (result) {
+	case MF_IGNORED:
+		++mf_stats->ignored;
+		break;
+	case MF_FAILED:
+		++mf_stats->failed;
+		break;
+	case MF_DELAYED:
+		++mf_stats->delayed;
+		break;
+	case MF_RECOVERED:
+		++mf_stats->recovered;
+		break;
+	default:
+		WARN_ONCE(1, "Memory failure: mf_result=%d is not properly handled", result);
+		break;
+	}
+	++mf_stats->total;
+}
+
 /*
  * "Dirty/Clean" indication is not 100% accurate due to the possibility of
  * setting PG_dirty outside page lock. See also comment above set_page_dirty().
@@ -1237,6 +1270,9 @@  static int action_result(unsigned long pfn, enum mf_action_page_type type,
 	trace_memory_failure_event(pfn, type, result);
 
 	num_poisoned_pages_inc(pfn);
+
+	update_per_node_mf_stats(pfn, result);
+
 	pr_err("%#lx: recovery action for %s: %s\n",
 		pfn, action_page_types[type], action_name[result]);