Message ID | 20210712173132.1205192-3-vaibhav@linux.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | papr: Implement initial support for injecting smart errors | expand |
On 7/12/21 11:01 PM, Vaibhav Jain wrote: > Implements support for ndctl inject-smart command by providing an > implementation of 'smart_inject*' dimm-ops callbacks. Presently only > support for injecting unsafe-shutdown and fatal-health states is > available. > > The patch also introduce various PAPR PDSM structures that are used to > communicate the inject-smart errors to the papr_scm kernel > module. This is done via SMART_INJECT PDSM which sends a payload of > type 'struct nd_papr_pdsm_smart_inject'. > > The patch depends on the kernel PAPR PDSM implementation for > PDSM_SMART_INJECT posted at [1]. > > [1] : https://lore.kernel.org/nvdimm/20210712084819.1150350-1-vaibhav@linux.ibm.com > Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com> > Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com> > --- > ndctl/lib/papr.c | 61 +++++++++++++++++++++++++++++++++++++++++++ > ndctl/lib/papr_pdsm.h | 17 ++++++++++++ > 2 files changed, 78 insertions(+) > > diff --git a/ndctl/lib/papr.c b/ndctl/lib/papr.c > index 42ff200dc588..b797e1e5fe8b 100644 > --- a/ndctl/lib/papr.c > +++ b/ndctl/lib/papr.c > @@ -221,6 +221,41 @@ static unsigned int papr_smart_get_shutdown_state(struct ndctl_cmd *cmd) > return health.dimm_bad_shutdown; > } > > +static int papr_smart_inject_supported(struct ndctl_dimm *dimm) > +{ > + if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_CALL)) > + return -EOPNOTSUPP; > + > + if (!test_dimm_dsm(dimm, PAPR_PDSM_SMART_INJECT)) > + return -EIO; > + > + return ND_SMART_INJECT_HEALTH_STATE | ND_SMART_INJECT_UNCLEAN_SHUTDOWN; > +} > + with ndtest PAPR_SCM_FAMILY driver, should we test more inject types? if so should the supported inject types be fetched from the driver? > +static int papr_smart_inject_valid(struct ndctl_cmd *cmd) > +{ > + if (cmd->type != ND_CMD_CALL || > + to_pdsm(cmd)->cmd_status != 0 || > + to_pdsm_cmd(cmd) != PAPR_PDSM_SMART_INJECT) > + return -EINVAL; > + > + return 0; > +} > + > +static struct ndctl_cmd *papr_new_smart_inject(struct ndctl_dimm *dimm) > +{ > + struct ndctl_cmd *cmd; > + > + cmd = allocate_cmd(dimm, PAPR_PDSM_SMART_INJECT, > + sizeof(struct nd_papr_pdsm_smart_inject)); > + if (!cmd) > + return NULL; > + /* Set the input payload size */ > + to_ndcmd(cmd)->nd_size_in = ND_PDSM_HDR_SIZE + > + sizeof(struct nd_papr_pdsm_smart_inject); > + return cmd; > +} > + > static unsigned int papr_smart_get_life_used(struct ndctl_cmd *cmd) > { > struct nd_papr_pdsm_health health; > @@ -255,11 +290,37 @@ static unsigned int papr_smart_get_shutdown_count(struct ndctl_cmd *cmd) > > return (health.extension_flags & PDSM_DIMM_DSC_VALID) ? > (health.dimm_dsc) : 0; > +} > + > +static int papr_cmd_smart_inject_fatal(struct ndctl_cmd *cmd, bool enable) > +{ > + if (papr_smart_inject_valid(cmd) < 0) > + return -EINVAL; > + > + to_payload(cmd)->inject.flags |= PDSM_SMART_INJECT_HEALTH_FATAL; > + to_payload(cmd)->inject.fatal_enable = enable; > > + return 0; > +} > + > +static int papr_cmd_smart_inject_unsafe_shutdown(struct ndctl_cmd *cmd, > + bool enable) > +{ > + if (papr_smart_inject_valid(cmd) < 0) > + return -EINVAL; > + > + to_payload(cmd)->inject.flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN; > + to_payload(cmd)->inject.unsafe_shutdown_enable = enable; > + > + return 0; > } > > struct ndctl_dimm_ops * const papr_dimm_ops = &(struct ndctl_dimm_ops) { > .cmd_is_supported = papr_cmd_is_supported, > + .new_smart_inject = papr_new_smart_inject, > + .smart_inject_supported = papr_smart_inject_supported, > + .smart_inject_fatal = papr_cmd_smart_inject_fatal, > + .smart_inject_unsafe_shutdown = papr_cmd_smart_inject_unsafe_shutdown, > .smart_get_flags = papr_smart_get_flags, > .get_firmware_status = papr_get_firmware_status, > .xlat_firmware_status = papr_xlat_firmware_status, > diff --git a/ndctl/lib/papr_pdsm.h b/ndctl/lib/papr_pdsm.h > index f45b1e40c075..20ac20f89acd 100644 > --- a/ndctl/lib/papr_pdsm.h > +++ b/ndctl/lib/papr_pdsm.h > @@ -121,12 +121,29 @@ struct nd_papr_pdsm_health { > enum papr_pdsm { > PAPR_PDSM_MIN = 0x0, > PAPR_PDSM_HEALTH, > + PAPR_PDSM_SMART_INJECT, > PAPR_PDSM_MAX, > }; > +/* Flags for injecting specific smart errors */ > +#define PDSM_SMART_INJECT_HEALTH_FATAL (1 << 0) > +#define PDSM_SMART_INJECT_BAD_SHUTDOWN (1 << 1) > + > +struct nd_papr_pdsm_smart_inject { > + union { > + struct { > + /* One or more of PDSM_SMART_INJECT_ */ > + __u32 flags; > + __u8 fatal_enable; > + __u8 unsafe_shutdown_enable; > + }; > + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; > + }; > +}; > > /* Maximal union that can hold all possible payload types */ > union nd_pdsm_payload { > struct nd_papr_pdsm_health health; > + struct nd_papr_pdsm_smart_inject inject; > __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; > } __attribute__((packed)); > > -aneesh
Thanks Aneesh for looking into this patch "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: > On 7/12/21 11:01 PM, Vaibhav Jain wrote: >> Implements support for ndctl inject-smart command by providing an >> implementation of 'smart_inject*' dimm-ops callbacks. Presently only >> support for injecting unsafe-shutdown and fatal-health states is >> available. >> >> The patch also introduce various PAPR PDSM structures that are used to >> communicate the inject-smart errors to the papr_scm kernel >> module. This is done via SMART_INJECT PDSM which sends a payload of >> type 'struct nd_papr_pdsm_smart_inject'. >> >> The patch depends on the kernel PAPR PDSM implementation for >> PDSM_SMART_INJECT posted at [1]. >> >> [1] : https://lore.kernel.org/nvdimm/20210712084819.1150350-1-vaibhav@linux.ibm.com >> Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com> >> Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com> >> --- >> ndctl/lib/papr.c | 61 +++++++++++++++++++++++++++++++++++++++++++ >> ndctl/lib/papr_pdsm.h | 17 ++++++++++++ >> 2 files changed, 78 insertions(+) >> >> diff --git a/ndctl/lib/papr.c b/ndctl/lib/papr.c >> index 42ff200dc588..b797e1e5fe8b 100644 >> --- a/ndctl/lib/papr.c >> +++ b/ndctl/lib/papr.c >> @@ -221,6 +221,41 @@ static unsigned int papr_smart_get_shutdown_state(struct ndctl_cmd *cmd) >> return health.dimm_bad_shutdown; >> } >> >> +static int papr_smart_inject_supported(struct ndctl_dimm *dimm) >> +{ >> + if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_CALL)) >> + return -EOPNOTSUPP; >> + >> + if (!test_dimm_dsm(dimm, PAPR_PDSM_SMART_INJECT)) >> + return -EIO; >> + >> + return ND_SMART_INJECT_HEALTH_STATE | ND_SMART_INJECT_UNCLEAN_SHUTDOWN; >> +} >> + > > with ndtest PAPR_SCM_FAMILY driver, should we test more inject types? Presently a commmon PDSM structure 'struct nd_papr_pdsm_smart_inject' used between ndtest and papr_scm. If we want to add support for more inject types in ndtest then that structure would need to be extended. However even with that, libndctl still shares common dimm-ops callback for papr_scm & ndtest which only supports injecting smart fatal health and dirty-shutdown at the moment. So with only ndtest supporting an inject type for example temprature-threshold, not sure which libndctl code patch we will be testing. > if > so should the supported inject types be fetched from the driver? > Good suggestion. Surely that can be a implemented in future once papr_scm and ndtest starts supporting more smart inject types. >> +static int papr_smart_inject_valid(struct ndctl_cmd *cmd) >> +{ >> + if (cmd->type != ND_CMD_CALL || >> + to_pdsm(cmd)->cmd_status != 0 || >> + to_pdsm_cmd(cmd) != PAPR_PDSM_SMART_INJECT) >> + return -EINVAL; >> + >> + return 0; >> +} >> + >> +static struct ndctl_cmd *papr_new_smart_inject(struct ndctl_dimm *dimm) >> +{ >> + struct ndctl_cmd *cmd; >> + >> + cmd = allocate_cmd(dimm, PAPR_PDSM_SMART_INJECT, >> + sizeof(struct nd_papr_pdsm_smart_inject)); >> + if (!cmd) >> + return NULL; >> + /* Set the input payload size */ >> + to_ndcmd(cmd)->nd_size_in = ND_PDSM_HDR_SIZE + >> + sizeof(struct nd_papr_pdsm_smart_inject); >> + return cmd; >> +} >> + >> static unsigned int papr_smart_get_life_used(struct ndctl_cmd *cmd) >> { >> struct nd_papr_pdsm_health health; >> @@ -255,11 +290,37 @@ static unsigned int papr_smart_get_shutdown_count(struct ndctl_cmd *cmd) >> >> return (health.extension_flags & PDSM_DIMM_DSC_VALID) ? >> (health.dimm_dsc) : 0; >> +} >> + >> +static int papr_cmd_smart_inject_fatal(struct ndctl_cmd *cmd, bool enable) >> +{ >> + if (papr_smart_inject_valid(cmd) < 0) >> + return -EINVAL; >> + >> + to_payload(cmd)->inject.flags |= PDSM_SMART_INJECT_HEALTH_FATAL; >> + to_payload(cmd)->inject.fatal_enable = enable; >> >> + return 0; >> +} >> + >> +static int papr_cmd_smart_inject_unsafe_shutdown(struct ndctl_cmd *cmd, >> + bool enable) >> +{ >> + if (papr_smart_inject_valid(cmd) < 0) >> + return -EINVAL; >> + >> + to_payload(cmd)->inject.flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN; >> + to_payload(cmd)->inject.unsafe_shutdown_enable = enable; >> + >> + return 0; >> } >> >> struct ndctl_dimm_ops * const papr_dimm_ops = &(struct ndctl_dimm_ops) { >> .cmd_is_supported = papr_cmd_is_supported, >> + .new_smart_inject = papr_new_smart_inject, >> + .smart_inject_supported = papr_smart_inject_supported, >> + .smart_inject_fatal = papr_cmd_smart_inject_fatal, >> + .smart_inject_unsafe_shutdown = papr_cmd_smart_inject_unsafe_shutdown, >> .smart_get_flags = papr_smart_get_flags, >> .get_firmware_status = papr_get_firmware_status, >> .xlat_firmware_status = papr_xlat_firmware_status, >> diff --git a/ndctl/lib/papr_pdsm.h b/ndctl/lib/papr_pdsm.h >> index f45b1e40c075..20ac20f89acd 100644 >> --- a/ndctl/lib/papr_pdsm.h >> +++ b/ndctl/lib/papr_pdsm.h >> @@ -121,12 +121,29 @@ struct nd_papr_pdsm_health { >> enum papr_pdsm { >> PAPR_PDSM_MIN = 0x0, >> PAPR_PDSM_HEALTH, >> + PAPR_PDSM_SMART_INJECT, >> PAPR_PDSM_MAX, >> }; >> +/* Flags for injecting specific smart errors */ >> +#define PDSM_SMART_INJECT_HEALTH_FATAL (1 << 0) >> +#define PDSM_SMART_INJECT_BAD_SHUTDOWN (1 << 1) >> + >> +struct nd_papr_pdsm_smart_inject { >> + union { >> + struct { >> + /* One or more of PDSM_SMART_INJECT_ */ >> + __u32 flags; >> + __u8 fatal_enable; >> + __u8 unsafe_shutdown_enable; >> + }; >> + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; >> + }; >> +}; >> >> /* Maximal union that can hold all possible payload types */ >> union nd_pdsm_payload { >> struct nd_papr_pdsm_health health; >> + struct nd_papr_pdsm_smart_inject inject; >> __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; >> } __attribute__((packed)); >> >> > > > -aneesh > >
diff --git a/ndctl/lib/papr.c b/ndctl/lib/papr.c index 42ff200dc588..b797e1e5fe8b 100644 --- a/ndctl/lib/papr.c +++ b/ndctl/lib/papr.c @@ -221,6 +221,41 @@ static unsigned int papr_smart_get_shutdown_state(struct ndctl_cmd *cmd) return health.dimm_bad_shutdown; } +static int papr_smart_inject_supported(struct ndctl_dimm *dimm) +{ + if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_CALL)) + return -EOPNOTSUPP; + + if (!test_dimm_dsm(dimm, PAPR_PDSM_SMART_INJECT)) + return -EIO; + + return ND_SMART_INJECT_HEALTH_STATE | ND_SMART_INJECT_UNCLEAN_SHUTDOWN; +} + +static int papr_smart_inject_valid(struct ndctl_cmd *cmd) +{ + if (cmd->type != ND_CMD_CALL || + to_pdsm(cmd)->cmd_status != 0 || + to_pdsm_cmd(cmd) != PAPR_PDSM_SMART_INJECT) + return -EINVAL; + + return 0; +} + +static struct ndctl_cmd *papr_new_smart_inject(struct ndctl_dimm *dimm) +{ + struct ndctl_cmd *cmd; + + cmd = allocate_cmd(dimm, PAPR_PDSM_SMART_INJECT, + sizeof(struct nd_papr_pdsm_smart_inject)); + if (!cmd) + return NULL; + /* Set the input payload size */ + to_ndcmd(cmd)->nd_size_in = ND_PDSM_HDR_SIZE + + sizeof(struct nd_papr_pdsm_smart_inject); + return cmd; +} + static unsigned int papr_smart_get_life_used(struct ndctl_cmd *cmd) { struct nd_papr_pdsm_health health; @@ -255,11 +290,37 @@ static unsigned int papr_smart_get_shutdown_count(struct ndctl_cmd *cmd) return (health.extension_flags & PDSM_DIMM_DSC_VALID) ? (health.dimm_dsc) : 0; +} + +static int papr_cmd_smart_inject_fatal(struct ndctl_cmd *cmd, bool enable) +{ + if (papr_smart_inject_valid(cmd) < 0) + return -EINVAL; + + to_payload(cmd)->inject.flags |= PDSM_SMART_INJECT_HEALTH_FATAL; + to_payload(cmd)->inject.fatal_enable = enable; + return 0; +} + +static int papr_cmd_smart_inject_unsafe_shutdown(struct ndctl_cmd *cmd, + bool enable) +{ + if (papr_smart_inject_valid(cmd) < 0) + return -EINVAL; + + to_payload(cmd)->inject.flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN; + to_payload(cmd)->inject.unsafe_shutdown_enable = enable; + + return 0; } struct ndctl_dimm_ops * const papr_dimm_ops = &(struct ndctl_dimm_ops) { .cmd_is_supported = papr_cmd_is_supported, + .new_smart_inject = papr_new_smart_inject, + .smart_inject_supported = papr_smart_inject_supported, + .smart_inject_fatal = papr_cmd_smart_inject_fatal, + .smart_inject_unsafe_shutdown = papr_cmd_smart_inject_unsafe_shutdown, .smart_get_flags = papr_smart_get_flags, .get_firmware_status = papr_get_firmware_status, .xlat_firmware_status = papr_xlat_firmware_status, diff --git a/ndctl/lib/papr_pdsm.h b/ndctl/lib/papr_pdsm.h index f45b1e40c075..20ac20f89acd 100644 --- a/ndctl/lib/papr_pdsm.h +++ b/ndctl/lib/papr_pdsm.h @@ -121,12 +121,29 @@ struct nd_papr_pdsm_health { enum papr_pdsm { PAPR_PDSM_MIN = 0x0, PAPR_PDSM_HEALTH, + PAPR_PDSM_SMART_INJECT, PAPR_PDSM_MAX, }; +/* Flags for injecting specific smart errors */ +#define PDSM_SMART_INJECT_HEALTH_FATAL (1 << 0) +#define PDSM_SMART_INJECT_BAD_SHUTDOWN (1 << 1) + +struct nd_papr_pdsm_smart_inject { + union { + struct { + /* One or more of PDSM_SMART_INJECT_ */ + __u32 flags; + __u8 fatal_enable; + __u8 unsafe_shutdown_enable; + }; + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; + }; +}; /* Maximal union that can hold all possible payload types */ union nd_pdsm_payload { struct nd_papr_pdsm_health health; + struct nd_papr_pdsm_smart_inject inject; __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; } __attribute__((packed));