Message ID | 20210112074924.217862-2-pizhenwei@bytedance.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | add smart_critical_warning property for nvme | expand |
On Jan 12 15:49, zhenwei pi wrote: > There is a very low probability that hitting physical NVMe disk > hardware critical warning case, it's hard to write & test a monitor > agent service. > > For debugging purposes, add a new 'smart_critical_warning' property > to emulate this situation. > > The orignal version of this change is implemented by adding a fixed > property which could be initialized by QEMU command line. Suggested > by Philippe & Klaus, rework like current version. > > Test with this patch: > 1, change smart_critical_warning property for a running VM: > #virsh qemu-monitor-command nvme-upstream '{ "execute": "qom-set", > "arguments": { "path": "/machine/peripheral-anon/device[0]", > "property": "smart_critical_warning", "value":16 } }' > 2, run smartctl in guest > #smartctl -H -l error /dev/nvme0n1 > > === START OF SMART DATA SECTION === > SMART overall-health self-assessment test result: FAILED! > - volatile memory backup device has failed > > Signed-off-by: zhenwei pi <pizhenwei@bytedance.com> I think we also need to check the asynchronous event configuration and issue an AEN if required like we do when the temperature threshold changes. Philippe, what are the locking semantics here? This runs under the big lock? > --- > hw/block/nvme.c | 28 ++++++++++++++++++++++++++++ > hw/block/nvme.h | 1 + > 2 files changed, 29 insertions(+) > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c > index 27d2c72716..a98757b6a1 100644 > --- a/hw/block/nvme.c > +++ b/hw/block/nvme.c > @@ -1214,6 +1214,7 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, > } > > trans_len = MIN(sizeof(smart) - off, buf_len); > + smart.critical_warning = n->smart_critical_warning; > > smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_read, > 1000)); > @@ -2827,6 +2828,29 @@ static Property nvme_props[] = { > DEFINE_PROP_END_OF_LIST(), > }; > > + > +static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name, > + void *opaque, Error **errp) > +{ > + NvmeCtrl *s = NVME(obj); > + uint8_t value = s->smart_critical_warning; > + > + visit_type_uint8(v, name, &value, errp); > +} > + > +static void nvme_set_smart_warning(Object *obj, Visitor *v, const char *name, > + void *opaque, Error **errp) > +{ > + NvmeCtrl *s = NVME(obj); > + uint8_t value; > + > + if (!visit_type_uint8(v, name, &value, errp)) { > + return; > + } > + > + s->smart_critical_warning = value; > +} > + > static const VMStateDescription nvme_vmstate = { > .name = "nvme", > .unmigratable = 1, > @@ -2857,6 +2881,10 @@ static void nvme_instance_init(Object *obj) > "bootindex", "/namespace@1,0", > DEVICE(obj)); > } > + > + object_property_add(obj, "smart_critical_warning", "uint8", > + nvme_get_smart_warning, > + nvme_set_smart_warning, NULL, NULL); > } > > static const TypeInfo nvme_info = { > diff --git a/hw/block/nvme.h b/hw/block/nvme.h > index e080a2318a..64e3497244 100644 > --- a/hw/block/nvme.h > +++ b/hw/block/nvme.h > @@ -139,6 +139,7 @@ typedef struct NvmeCtrl { > uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ > uint64_t starttime_ms; > uint16_t temperature; > + uint8_t smart_critical_warning; > > HostMemoryBackend *pmrdev; > > -- > 2.25.1 > >
diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 27d2c72716..a98757b6a1 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1214,6 +1214,7 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, } trans_len = MIN(sizeof(smart) - off, buf_len); + smart.critical_warning = n->smart_critical_warning; smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_read, 1000)); @@ -2827,6 +2828,29 @@ static Property nvme_props[] = { DEFINE_PROP_END_OF_LIST(), }; + +static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NvmeCtrl *s = NVME(obj); + uint8_t value = s->smart_critical_warning; + + visit_type_uint8(v, name, &value, errp); +} + +static void nvme_set_smart_warning(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NvmeCtrl *s = NVME(obj); + uint8_t value; + + if (!visit_type_uint8(v, name, &value, errp)) { + return; + } + + s->smart_critical_warning = value; +} + static const VMStateDescription nvme_vmstate = { .name = "nvme", .unmigratable = 1, @@ -2857,6 +2881,10 @@ static void nvme_instance_init(Object *obj) "bootindex", "/namespace@1,0", DEVICE(obj)); } + + object_property_add(obj, "smart_critical_warning", "uint8", + nvme_get_smart_warning, + nvme_set_smart_warning, NULL, NULL); } static const TypeInfo nvme_info = { diff --git a/hw/block/nvme.h b/hw/block/nvme.h index e080a2318a..64e3497244 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -139,6 +139,7 @@ typedef struct NvmeCtrl { uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ uint64_t starttime_ms; uint16_t temperature; + uint8_t smart_critical_warning; HostMemoryBackend *pmrdev;
There is a very low probability that hitting physical NVMe disk hardware critical warning case, it's hard to write & test a monitor agent service. For debugging purposes, add a new 'smart_critical_warning' property to emulate this situation. The orignal version of this change is implemented by adding a fixed property which could be initialized by QEMU command line. Suggested by Philippe & Klaus, rework like current version. Test with this patch: 1, change smart_critical_warning property for a running VM: #virsh qemu-monitor-command nvme-upstream '{ "execute": "qom-set", "arguments": { "path": "/machine/peripheral-anon/device[0]", "property": "smart_critical_warning", "value":16 } }' 2, run smartctl in guest #smartctl -H -l error /dev/nvme0n1 === START OF SMART DATA SECTION === SMART overall-health self-assessment test result: FAILED! - volatile memory backup device has failed Signed-off-by: zhenwei pi <pizhenwei@bytedance.com> --- hw/block/nvme.c | 28 ++++++++++++++++++++++++++++ hw/block/nvme.h | 1 + 2 files changed, 29 insertions(+)