diff mbox series

[ndctl] ndctl, monitor: support NVDIMM_FAMILY_HYPERV

Message ID PU1P153MB016948B28D99FE01EE8252DFBF600@PU1P153MB0169.APCP153.PROD.OUTLOOK.COM (mailing list archive)
State New, archived
Headers show
Series [ndctl] ndctl, monitor: support NVDIMM_FAMILY_HYPERV | expand

Commit Message

Dexuan Cui Feb. 15, 2019, 6:38 a.m. UTC
Currently "ndctl monitor" fails for NVDIMM_FAMILY_HYPERV due to
"no smart support".

Actually NVDIMM_FAMILY_HYPERV doesn't use ND_CMD_SMART to get the health
info. Instead, it uses ND_CMD_CALL, so the checking here can't apply,
and NVDIMM_FAMILY_HYPERV doesn't support threshold alarms.

Let's skip the unnecessary checking for NVDIMM_FAMILY_HYPERV.

With the patch, when an error happens, we can log it with such a message:

{"timestamp":"1550209474.683237420","pid":3874,"event":
{"dimm-spares-remaining":false,"dimm-media-temperature":false,
"dimm-controller-temperature":false,"dimm-health-state":true,
"dimm-unclean-shutdown":false},"dimm":{"dev":"nmem1",
"id":"04d5-01-1701-01000000","handle":1,"phys_id":0,"health":
{"health_state":"critical","shutdown_count":8}}}

Here the meaning info is:
"health": {"health_state":"critical","shutdown_count":8}

Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
 ndctl/lib/libndctl.c   |  5 +++++
 ndctl/lib/libndctl.sym |  1 +
 ndctl/libndctl.h       |  1 +
 ndctl/monitor.c        | 33 ++++++++++++++++++++++++++-------
 4 files changed, 33 insertions(+), 7 deletions(-)

Comments

Johannes Thumshirn Feb. 15, 2019, 8:31 a.m. UTC | #1
Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
qi.fuli@fujitsu.com Feb. 15, 2019, 9:26 a.m. UTC | #2
> -----Original Message-----
> From: Linux-nvdimm <linux-nvdimm-bounces@lists.01.org> On Behalf Of Dexuan Cui
> Sent: Friday, February 15, 2019 3:38 PM
> To: Dave Jiang <dave.jiang@intel.com>; Vishal Verma <vishal.l.verma@intel.com>;
> Dan Williams <dan.j.williams@intel.com>; linux-nvdimm@lists.01.org
> Cc: Michael Kelley <mikelley@microsoft.com>
> Subject: [ndctl PATCH] ndctl, monitor: support NVDIMM_FAMILY_HYPERV
> 
> 
> Currently "ndctl monitor" fails for NVDIMM_FAMILY_HYPERV due to
> "no smart support".
> 
> Actually NVDIMM_FAMILY_HYPERV doesn't use ND_CMD_SMART to get the health
> info. Instead, it uses ND_CMD_CALL, so the checking here can't apply,
> and NVDIMM_FAMILY_HYPERV doesn't support threshold alarms.
> 
> Let's skip the unnecessary checking for NVDIMM_FAMILY_HYPERV.
> 
> With the patch, when an error happens, we can log it with such a message:
> 
> {"timestamp":"1550209474.683237420","pid":3874,"event":
> {"dimm-spares-remaining":false,"dimm-media-temperature":false,
> "dimm-controller-temperature":false,"dimm-health-state":true,
> "dimm-unclean-shutdown":false},"dimm":{"dev":"nmem1",
> "id":"04d5-01-1701-01000000","handle":1,"phys_id":0,"health":
> {"health_state":"critical","shutdown_count":8}}}
> 
> Here the meaning info is:
> "health": {"health_state":"critical","shutdown_count":8}
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
>  ndctl/lib/libndctl.c   |  5 +++++
>  ndctl/lib/libndctl.sym |  1 +
>  ndctl/libndctl.h       |  1 +
>  ndctl/monitor.c        | 33 ++++++++++++++++++++++++++-------
>  4 files changed, 33 insertions(+), 7 deletions(-)
> 
> diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
> index 48bdb27..1186579 100644
> --- a/ndctl/lib/libndctl.c
> +++ b/ndctl/lib/libndctl.c
> @@ -1550,6 +1550,11 @@ NDCTL_EXPORT struct ndctl_dimm
> *ndctl_dimm_get_next(struct ndctl_dimm *dimm)
>  	return list_next(&bus->dimms, dimm, list);
>  }
> 
> +NDCTL_EXPORT unsigned long ndctl_dimm_get_cmd_family(struct ndctl_dimm
> *dimm)
> +{
> +	return dimm->cmd_family;
> +}
> +
>  NDCTL_EXPORT unsigned int ndctl_dimm_get_handle(struct ndctl_dimm *dimm)
>  {
>  	return dimm->handle;
> diff --git a/ndctl/lib/libndctl.sym b/ndctl/lib/libndctl.sym
> index cb9f769..470e895 100644
> --- a/ndctl/lib/libndctl.sym
> +++ b/ndctl/lib/libndctl.sym
> @@ -38,6 +38,7 @@ global:
>  	ndctl_bus_wait_probe;
>  	ndctl_dimm_get_first;
>  	ndctl_dimm_get_next;
> +	ndctl_dimm_get_cmd_family;
>  	ndctl_dimm_get_handle;
>  	ndctl_dimm_get_phys_id;
>  	ndctl_dimm_get_vendor;
> diff --git a/ndctl/libndctl.h b/ndctl/libndctl.h
> index 0debdb6..cb5a8fc 100644
> --- a/ndctl/libndctl.h
> +++ b/ndctl/libndctl.h
> @@ -145,6 +145,7 @@ struct ndctl_dimm *ndctl_dimm_get_next(struct ndctl_dimm
> *dimm);
>          for (dimm = ndctl_dimm_get_first(bus); \
>               dimm != NULL; \
>               dimm = ndctl_dimm_get_next(dimm))
> +unsigned long ndctl_dimm_get_cmd_family(struct ndctl_dimm *dimm);
>  unsigned int ndctl_dimm_get_handle(struct ndctl_dimm *dimm);
>  unsigned short ndctl_dimm_get_phys_id(struct ndctl_dimm *dimm);
>  unsigned short ndctl_dimm_get_vendor(struct ndctl_dimm *dimm);
> diff --git a/ndctl/monitor.c b/ndctl/monitor.c
> index 43b2abe..6adc305 100644
> --- a/ndctl/monitor.c
> +++ b/ndctl/monitor.c
> @@ -265,31 +265,50 @@ static bool filter_region(struct ndctl_region *region,
>  	return true;
>  }
> 
> -static void filter_dimm(struct ndctl_dimm *dimm, struct util_filter_ctx *fctx)
> +static bool ndctl_dimm_test_and_enable_notification(struct ndctl_dimm *dimm)
>  {
> -	struct monitor_dimm *mdimm;
> -	struct monitor_filter_arg *mfa = fctx->monitor;
>  	const char *name = ndctl_dimm_get_devname(dimm);
> 
> +	/*
> +	 * Hyper-V Virtual NVDIMM doesn't use ND_CMD_SMART to get the health
> +	 * info. Instead, it uses ND_CMD_CALL, so the checking here can't
> +	 * apply, and it doesn't support threshold alarms.
> +	 */
> +	if (ndctl_dimm_get_cmd_family(dimm) == NVDIMM_FAMILY_HYPERV)

Hi,

I think it would be better to add a checking monitor.event_flags step here.
Users should be notified if they setup monitoring smart threshold events on NVDIMM_FAMILY_HYPERV.

QI

> +		return true;
> +
>  	if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_SMART)) {
>  		err(&monitor, "%s: no smart support\n", name);
> -		return;
> +		return false;
>  	}
>  	if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_SMART_THRESHOLD))
> {
>  		err(&monitor, "%s: no smart threshold support\n", name);
> -		return;
> +		return false;
>  	}
> 
>  	if (!ndctl_dimm_is_flag_supported(dimm, ND_SMART_ALARM_VALID)) {
>  		err(&monitor, "%s: smart alarm invalid\n", name);
> -		return;
> +		return false;
>  	}
> 
>  	if (enable_dimm_supported_threshold_alarms(dimm)) {
>  		err(&monitor, "%s: enable supported threshold alarms failed\n",
> name);
> -		return;
> +		return false;
>  	}
> 
> +	return true;
> +}
> +
> +static void filter_dimm(struct ndctl_dimm *dimm, struct util_filter_ctx *fctx)
> +{
> +	struct monitor_dimm *mdimm;
> +	struct monitor_filter_arg *mfa = fctx->monitor;
> +	const char *name = ndctl_dimm_get_devname(dimm);
> +
> +
> +	if (!ndctl_dimm_test_and_enable_notification(dimm))
> +		return;
> +
>  	mdimm = calloc(1, sizeof(struct monitor_dimm));
>  	if (!mdimm) {
>  		err(&monitor, "%s: calloc for monitor dimm failed\n", name);
> --
> 2.19.1
> 
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm
Dexuan Cui Feb. 16, 2019, 7:25 a.m. UTC | #3
> From: qi.fuli@fujitsu.com <qi.fuli@fujitsu.com>
> Sent: Friday, February 15, 2019 1:26 AM
> > ...
> > --- a/ndctl/monitor.c
> > +++ b/ndctl/monitor.c
> > @@ -265,31 +265,50 @@ static bool filter_region(struct ndctl_region
> *region,
> >  	return true;
> >  }
> >
> > -static void filter_dimm(struct ndctl_dimm *dimm, struct util_filter_ctx *fctx)
> > +static bool ndctl_dimm_test_and_enable_notification(struct ndctl_dimm
> *dimm)
> >  {
> > -	struct monitor_dimm *mdimm;
> > -	struct monitor_filter_arg *mfa = fctx->monitor;
> >  	const char *name = ndctl_dimm_get_devname(dimm);
> >
> > +	/*
> > +	 * Hyper-V Virtual NVDIMM doesn't use ND_CMD_SMART to get the
> > health
> > +	 * info. Instead, it uses ND_CMD_CALL, so the checking here can't
> > +	 * apply, and it doesn't support threshold alarms.
> > +	 */
> > +	if (ndctl_dimm_get_cmd_family(dimm) == NVDIMM_FAMILY_HYPERV)
> > +		return true;
> 
> Hi,
> 
> I think it would be better to add a checking monitor.event_flags step here.
> Users should be notified if they setup monitoring smart threshold events on
> NVDIMM_FAMILY_HYPERV.
> 
> QI

Hi Qi,
Unluckily NVDIMM_FAMILY_HYPERV doesn't support monitoring smart 
threshold events. Please see the _DSM Interface for Hyper-V Virtual NVDIMM
at https://uefi.org/RFIC_LIST (Virtual NVDIMM 0x1901). 

So there is no ops->new_smart_threshold defined for
NVDIMM_FAMILY_HYPERV.

The patch only skips the checks for NVDIMM_FAMILY_HYPERV, and the behavior
for the others remains the same.

Thanks,
-- Dexuan
qi.fuli@fujitsu.com Feb. 18, 2019, 1:52 a.m. UTC | #4
> -----Original Message-----
> From: Dexuan Cui <decui@microsoft.com>
> Sent: Saturday, February 16, 2019 4:26 PM
> To: Qi, Fuli <qi.fuli@fujitsu.com>; Dave Jiang <dave.jiang@intel.com>;
> Vishal Verma <vishal.l.verma@intel.com>; Dan Williams <dan.j.williams@intel.com>;
> linux-nvdimm@lists.01.org
> Cc: Michael Kelley <mikelley@microsoft.com>
> Subject: RE: [ndctl PATCH] ndctl, monitor: support NVDIMM_FAMILY_HYPERV
> 
> > From: qi.fuli@fujitsu.com <qi.fuli@fujitsu.com>
> > Sent: Friday, February 15, 2019 1:26 AM
> > > ...
> > > --- a/ndctl/monitor.c
> > > +++ b/ndctl/monitor.c
> > > @@ -265,31 +265,50 @@ static bool filter_region(struct ndctl_region
> > *region,
> > >  	return true;
> > >  }
> > >
> > > -static void filter_dimm(struct ndctl_dimm *dimm, struct
> > > util_filter_ctx *fctx)
> > > +static bool ndctl_dimm_test_and_enable_notification(struct
> > > +ndctl_dimm
> > *dimm)
> > >  {
> > > -	struct monitor_dimm *mdimm;
> > > -	struct monitor_filter_arg *mfa = fctx->monitor;
> > >  	const char *name = ndctl_dimm_get_devname(dimm);
> > >
> > > +	/*
> > > +	 * Hyper-V Virtual NVDIMM doesn't use ND_CMD_SMART to get the
> > > health
> > > +	 * info. Instead, it uses ND_CMD_CALL, so the checking here can't
> > > +	 * apply, and it doesn't support threshold alarms.
> > > +	 */
> > > +	if (ndctl_dimm_get_cmd_family(dimm) == NVDIMM_FAMILY_HYPERV)
> > > +		return true;
> >
> > Hi,
> >
> > I think it would be better to add a checking monitor.event_flags step here.
> > Users should be notified if they setup monitoring smart threshold
> > events on NVDIMM_FAMILY_HYPERV.
> >
> > QI
> 
> Hi Qi,
> Unluckily NVDIMM_FAMILY_HYPERV doesn't support monitoring smart threshold
> events. Please see the _DSM Interface for Hyper-V Virtual NVDIMM at
> https://uefi.org/RFIC_LIST (Virtual NVDIMM 0x1901).
> 
> So there is no ops->new_smart_threshold defined for NVDIMM_FAMILY_HYPERV.
> 
> The patch only skips the checks for NVDIMM_FAMILY_HYPERV, and the behavior for
> the others remains the same.
> 
> Thanks,
> -- Dexuan

Hi Dexuan,

I am sorry I didn't explain it clearly enough.
I want to say that users may not know that NVDIMM_FAMILY_HYPERV doesn't support monitoring smart threshold events.
If users setup monitoring smart threshold events on NVDIMM_FAMILY_HYPERV by mistake,
it would be more friendly to send them a notification.

Thanks,
  Qi
Dexuan Cui Feb. 19, 2019, 6:07 a.m. UTC | #5
> From: qi.fuli@fujitsu.com <qi.fuli@fujitsu.com>
> Sent: Sunday, February 17, 2019 5:52 PM
> ... 
> I am sorry I didn't explain it clearly enough.
> I want to say that users may not know that NVDIMM_FAMILY_HYPERV doesn't
> support monitoring smart threshold events.
> If users setup monitoring smart threshold events on
> NVDIMM_FAMILY_HYPERV by mistake,
> it would be more friendly to send them a notification.
> 
>  Qi
Got it. 
I'll add an explicit warning and don't monitor unsupported events.

Thanks,
-- Dexuan
diff mbox series

Patch

diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
index 48bdb27..1186579 100644
--- a/ndctl/lib/libndctl.c
+++ b/ndctl/lib/libndctl.c
@@ -1550,6 +1550,11 @@  NDCTL_EXPORT struct ndctl_dimm *ndctl_dimm_get_next(struct ndctl_dimm *dimm)
 	return list_next(&bus->dimms, dimm, list);
 }
 
+NDCTL_EXPORT unsigned long ndctl_dimm_get_cmd_family(struct ndctl_dimm *dimm)
+{
+	return dimm->cmd_family;
+}
+
 NDCTL_EXPORT unsigned int ndctl_dimm_get_handle(struct ndctl_dimm *dimm)
 {
 	return dimm->handle;
diff --git a/ndctl/lib/libndctl.sym b/ndctl/lib/libndctl.sym
index cb9f769..470e895 100644
--- a/ndctl/lib/libndctl.sym
+++ b/ndctl/lib/libndctl.sym
@@ -38,6 +38,7 @@  global:
 	ndctl_bus_wait_probe;
 	ndctl_dimm_get_first;
 	ndctl_dimm_get_next;
+	ndctl_dimm_get_cmd_family;
 	ndctl_dimm_get_handle;
 	ndctl_dimm_get_phys_id;
 	ndctl_dimm_get_vendor;
diff --git a/ndctl/libndctl.h b/ndctl/libndctl.h
index 0debdb6..cb5a8fc 100644
--- a/ndctl/libndctl.h
+++ b/ndctl/libndctl.h
@@ -145,6 +145,7 @@  struct ndctl_dimm *ndctl_dimm_get_next(struct ndctl_dimm *dimm);
         for (dimm = ndctl_dimm_get_first(bus); \
              dimm != NULL; \
              dimm = ndctl_dimm_get_next(dimm))
+unsigned long ndctl_dimm_get_cmd_family(struct ndctl_dimm *dimm);
 unsigned int ndctl_dimm_get_handle(struct ndctl_dimm *dimm);
 unsigned short ndctl_dimm_get_phys_id(struct ndctl_dimm *dimm);
 unsigned short ndctl_dimm_get_vendor(struct ndctl_dimm *dimm);
diff --git a/ndctl/monitor.c b/ndctl/monitor.c
index 43b2abe..6adc305 100644
--- a/ndctl/monitor.c
+++ b/ndctl/monitor.c
@@ -265,31 +265,50 @@  static bool filter_region(struct ndctl_region *region,
 	return true;
 }
 
-static void filter_dimm(struct ndctl_dimm *dimm, struct util_filter_ctx *fctx)
+static bool ndctl_dimm_test_and_enable_notification(struct ndctl_dimm *dimm)
 {
-	struct monitor_dimm *mdimm;
-	struct monitor_filter_arg *mfa = fctx->monitor;
 	const char *name = ndctl_dimm_get_devname(dimm);
 
+	/*
+	 * Hyper-V Virtual NVDIMM doesn't use ND_CMD_SMART to get the health
+	 * info. Instead, it uses ND_CMD_CALL, so the checking here can't
+	 * apply, and it doesn't support threshold alarms.
+	 */
+	if (ndctl_dimm_get_cmd_family(dimm) == NVDIMM_FAMILY_HYPERV)
+		return true;
+
 	if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_SMART)) {
 		err(&monitor, "%s: no smart support\n", name);
-		return;
+		return false;
 	}
 	if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_SMART_THRESHOLD)) {
 		err(&monitor, "%s: no smart threshold support\n", name);
-		return;
+		return false;
 	}
 
 	if (!ndctl_dimm_is_flag_supported(dimm, ND_SMART_ALARM_VALID)) {
 		err(&monitor, "%s: smart alarm invalid\n", name);
-		return;
+		return false;
 	}
 
 	if (enable_dimm_supported_threshold_alarms(dimm)) {
 		err(&monitor, "%s: enable supported threshold alarms failed\n", name);
-		return;
+		return false;
 	}
 
+	return true;
+}
+
+static void filter_dimm(struct ndctl_dimm *dimm, struct util_filter_ctx *fctx)
+{
+	struct monitor_dimm *mdimm;
+	struct monitor_filter_arg *mfa = fctx->monitor;
+	const char *name = ndctl_dimm_get_devname(dimm);
+
+
+	if (!ndctl_dimm_test_and_enable_notification(dimm))
+		return;
+
 	mdimm = calloc(1, sizeof(struct monitor_dimm));
 	if (!mdimm) {
 		err(&monitor, "%s: calloc for monitor dimm failed\n", name);