Message ID | 20221114135043.2958100-4-j.granados@samsung.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add OCP extended log to nvme QEMU | expand |
On Nov 14 14:50, Joel Granados wrote: > In order to evaluate write amplification factor (WAF) within the storage > stack it is important to know the number of bytes written to the > controller. The existing SMART log value of Data Units Written is too > coarse (given in units of 500 Kb) and so we add the SMART health > information extended from the OCP specification (given in units of bytes). > > To accomodate different vendor specific specifications like OCP, we add a > multiplexing function (nvme_vendor_specific_log) which will route to the > different log functions based on arguments and log ids. We only return the > OCP extended smart log when the command is 0xC0 and ocp has been turned on > in the args. > > Though we add the whole nvme smart log extended structure, we only populate > the physical_media_units_{read,written}, log_page_version and > log_page_uuid. > > Signed-off-by: Joel Granados <j.granados@samsung.com> > > squash with main > > Signed-off-by: Joel Granados <j.granados@samsung.com> Looks like you slightly messed up the squash ;) Also, squash the previous patch (adding the ocp parameter) into this. Please add a note in the documentation (docs/system/devices/nvme.rst) about this parameter. > --- > hw/nvme/ctrl.c | 56 ++++++++++++++++++++++++++++++++++++++++++++ > include/block/nvme.h | 36 ++++++++++++++++++++++++++++ > 2 files changed, 92 insertions(+) > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c > index 220683201a..5e6a8150a2 100644 > --- a/hw/nvme/ctrl.c > +++ b/hw/nvme/ctrl.c > @@ -4455,6 +4455,42 @@ static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats) > stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE]; > } > > +static uint16_t nvme_ocp_extended_smart_info(NvmeCtrl *n, uint8_t rae, > + uint32_t buf_len, uint64_t off, > + NvmeRequest *req) > +{ > + NvmeNamespace *ns = NULL; > + NvmeSmartLogExtended smart_ext = { 0 }; > + struct nvme_stats stats = { 0 }; > + uint32_t trans_len; > + > + if (off >= sizeof(smart_ext)) { > + return NVME_INVALID_FIELD | NVME_DNR; > + } > + > + // Accumulate all stats from all namespaces Use /* lower-case and no period */ for one sentence, one line comments. I think scripts/checkpatch.pl picks this up. > + for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) { > + ns = nvme_ns(n, i); > + if (ns) > + { Paranthesis go on the same line as the `if`. > + nvme_set_blk_stats(ns, &stats); > + } > + } > + > + smart_ext.physical_media_units_written[0] = cpu_to_le32(stats.units_written); > + smart_ext.physical_media_units_read[0] = cpu_to_le32(stats.units_read); > + smart_ext.log_page_version = 0x0003; > + smart_ext.log_page_uuid[0] = 0xA4F2BFEA2810AFC5; > + smart_ext.log_page_uuid[1] = 0xAFD514C97C6F4F9C; > + > + if (!rae) { > + nvme_clear_events(n, NVME_AER_TYPE_SMART); > + } > + > + trans_len = MIN(sizeof(smart_ext) - off, buf_len); > + return nvme_c2h(n, (uint8_t *) &smart_ext + off, trans_len, req); > +} > + > static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, > uint64_t off, NvmeRequest *req) > { > @@ -4642,6 +4678,24 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, > return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req); > } > > +static uint16_t nvme_vendor_specific_log(uint8_t lid, NvmeCtrl *n, uint8_t rae, > + uint32_t buf_len, uint64_t off, > + NvmeRequest *req) `NvmeCtrl *n` must be first parameter. > +{ > + NvmeSubsystem *subsys = n->subsys; > + switch (lid) { > + case NVME_LOG_VENDOR_START: In this particular case, I think it is more clear if you simply use the hex value directly. The "meaning" of the log page id depends on if or not this is an controller implementing the OCP spec. > + if (subsys->params.ocp) { > + return nvme_ocp_extended_smart_info(n, rae, buf_len, off, req); > + } > + break; > + /* Add a case for each additional vendor specific log id */ Lower-case the comment. > + } > + > + trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid); > + return NVME_INVALID_FIELD | NVME_DNR; > +} > + > static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) > { > NvmeCmd *cmd = &req->cmd; > @@ -4683,6 +4737,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) > return nvme_error_info(n, rae, len, off, req); > case NVME_LOG_SMART_INFO: > return nvme_smart_info(n, rae, len, off, req); > + case NVME_LOG_VENDOR_START...NVME_LOG_VENDOR_END: > + return nvme_vendor_specific_log(lid, n, rae, len, off, req); > case NVME_LOG_FW_SLOT_INFO: > return nvme_fw_log_info(n, len, off, req); > case NVME_LOG_CHANGED_NSLIST: > diff --git a/include/block/nvme.h b/include/block/nvme.h > index 8027b7126b..2ab0dca529 100644 > --- a/include/block/nvme.h > +++ b/include/block/nvme.h > @@ -978,6 +978,40 @@ typedef struct QEMU_PACKED NvmeSmartLog { > uint8_t reserved2[320]; > } NvmeSmartLog; > > +typedef struct QEMU_PACKED NvmeSmartLogExtended { > + uint64_t physical_media_units_written[2]; > + uint64_t physical_media_units_read[2]; > + uint64_t bad_user_blocks; > + uint64_t bad_system_nand_blocks; > + uint64_t xor_recovery_count; > + uint64_t uncorrectable_read_error_count; > + uint64_t soft_ecc_error_count; > + uint64_t end2end_correction_counts; > + uint8_t system_data_percent_used; > + uint8_t refresh_counts[7]; > + uint64_t user_data_erase_counts; > + uint16_t thermal_throttling_stat_and_count; > + uint16_t dssd_spec_version[3]; > + uint64_t pcie_correctable_error_count; > + uint32_t incomplete_shutdowns; > + uint32_t reserved0; I know that it is not totally consistent across the code, but please use `rsvd<BYTEOFFSET>` for the reserved field names. The above would be `rsvd116` if I am not mistaken. > + uint8_t percent_free_blocks; > + uint8_t reserved1[7]; > + uint16_t capacity_health; > + uint8_t nvme_errata_ver; > + uint8_t reserved2[5]; > + uint64_t unaligned_io; > + uint64_t security_ver_num; > + uint64_t total_nuse; > + uint64_t plp_start_count[2]; > + uint64_t endurance_estimate[2]; > + uint64_t pcie_retraining_count; > + uint64_t power_state_change_count; > + uint8_t reserved3[286]; > + uint16_t log_page_version; > + uint64_t log_page_uuid[2]; > +} NvmeSmartLogExtended; > + > #define NVME_SMART_WARN_MAX 6 > enum NvmeSmartWarn { > NVME_SMART_SPARE = 1 << 0, > @@ -1010,6 +1044,8 @@ enum NvmeLogIdentifier { > NVME_LOG_FW_SLOT_INFO = 0x03, > NVME_LOG_CHANGED_NSLIST = 0x04, > NVME_LOG_CMD_EFFECTS = 0x05, > + NVME_LOG_VENDOR_START = 0xC0, > + NVME_LOG_VENDOR_END = 0xFF, Existing style is generally lower-case hex. > }; > > typedef struct QEMU_PACKED NvmePSD { > -- > 2.30.2 > >
On Tue, Nov 15, 2022 at 12:26:17PM +0100, Klaus Jensen wrote: > On Nov 14 14:50, Joel Granados wrote: > > In order to evaluate write amplification factor (WAF) within the storage > > stack it is important to know the number of bytes written to the > > controller. The existing SMART log value of Data Units Written is too > > coarse (given in units of 500 Kb) and so we add the SMART health > > information extended from the OCP specification (given in units of bytes). > > > > To accomodate different vendor specific specifications like OCP, we add a > > multiplexing function (nvme_vendor_specific_log) which will route to the > > different log functions based on arguments and log ids. We only return the > > OCP extended smart log when the command is 0xC0 and ocp has been turned on > > in the args. > > > > Though we add the whole nvme smart log extended structure, we only populate > > the physical_media_units_{read,written}, log_page_version and > > log_page_uuid. > > > > Signed-off-by: Joel Granados <j.granados@samsung.com> > > > > squash with main > > > > Signed-off-by: Joel Granados <j.granados@samsung.com> > > Looks like you slightly messed up the squash ;) oops. that is my bad > > Also, squash the previous patch (adding the ocp parameter) into this. Here I wanted to keep the introduction of the argument separate. In any case, I'll squash it with the other one. > Please add a note in the documentation (docs/system/devices/nvme.rst) > about this parameter. Of course. I always forget documentation. I'll add it under the "Controller Emulation" section and I'll call it ``ocp`` > > > --- > > hw/nvme/ctrl.c | 56 ++++++++++++++++++++++++++++++++++++++++++++ > > include/block/nvme.h | 36 ++++++++++++++++++++++++++++ > > 2 files changed, 92 insertions(+) > > > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c > > index 220683201a..5e6a8150a2 100644 > > --- a/hw/nvme/ctrl.c > > +++ b/hw/nvme/ctrl.c > > @@ -4455,6 +4455,42 @@ static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats) > > stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE]; > > } > > > > +static uint16_t nvme_ocp_extended_smart_info(NvmeCtrl *n, uint8_t rae, > > + uint32_t buf_len, uint64_t off, > > + NvmeRequest *req) > > +{ > > + NvmeNamespace *ns = NULL; > > + NvmeSmartLogExtended smart_ext = { 0 }; > > + struct nvme_stats stats = { 0 }; > > + uint32_t trans_len; > > + > > + if (off >= sizeof(smart_ext)) { > > + return NVME_INVALID_FIELD | NVME_DNR; > > + } > > + > > + // Accumulate all stats from all namespaces > > Use /* lower-case and no period */ for one sentence, one line comments. > > I think scripts/checkpatch.pl picks this up. There is a checkpatch like in the kernel. Fantastic! I'll make a note to use it from now on. > > > + for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) { > > + ns = nvme_ns(n, i); > > + if (ns) > > + { > > Paranthesis go on the same line as the `if`. of course > > > + nvme_set_blk_stats(ns, &stats); > > + } > > + } > > + > > + smart_ext.physical_media_units_written[0] = cpu_to_le32(stats.units_written); > > + smart_ext.physical_media_units_read[0] = cpu_to_le32(stats.units_read); > > + smart_ext.log_page_version = 0x0003; > > + smart_ext.log_page_uuid[0] = 0xA4F2BFEA2810AFC5; > > + smart_ext.log_page_uuid[1] = 0xAFD514C97C6F4F9C; > > + > > + if (!rae) { > > + nvme_clear_events(n, NVME_AER_TYPE_SMART); > > + } > > + > > + trans_len = MIN(sizeof(smart_ext) - off, buf_len); > > + return nvme_c2h(n, (uint8_t *) &smart_ext + off, trans_len, req); > > +} > > + > > static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, > > uint64_t off, NvmeRequest *req) > > { > > @@ -4642,6 +4678,24 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, > > return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req); > > } > > > > +static uint16_t nvme_vendor_specific_log(uint8_t lid, NvmeCtrl *n, uint8_t rae, > > + uint32_t buf_len, uint64_t off, > > + NvmeRequest *req) > > `NvmeCtrl *n` must be first parameter. Any reason why this is the case? I'll change it in my code, but would be nice to understand the reason. > > > +{ > > + NvmeSubsystem *subsys = n->subsys; > > + switch (lid) { > > + case NVME_LOG_VENDOR_START: > > In this particular case, I think it is more clear if you simply use the > hex value directly. The "meaning" of the log page id depends on if or > not this is an controller implementing the OCP spec. Agreed > > > + if (subsys->params.ocp) { > > + return nvme_ocp_extended_smart_info(n, rae, buf_len, off, req); > > + } > > + break; > > + /* Add a case for each additional vendor specific log id */ > > Lower-case the comment. ok > > > + } > > + > > + trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid); > > + return NVME_INVALID_FIELD | NVME_DNR; > > +} > > + > > static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) > > { > > NvmeCmd *cmd = &req->cmd; > > @@ -4683,6 +4737,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) > > return nvme_error_info(n, rae, len, off, req); > > case NVME_LOG_SMART_INFO: > > return nvme_smart_info(n, rae, len, off, req); > > + case NVME_LOG_VENDOR_START...NVME_LOG_VENDOR_END: > > + return nvme_vendor_specific_log(lid, n, rae, len, off, req); > > case NVME_LOG_FW_SLOT_INFO: > > return nvme_fw_log_info(n, len, off, req); > > case NVME_LOG_CHANGED_NSLIST: > > diff --git a/include/block/nvme.h b/include/block/nvme.h > > index 8027b7126b..2ab0dca529 100644 > > --- a/include/block/nvme.h > > +++ b/include/block/nvme.h > > @@ -978,6 +978,40 @@ typedef struct QEMU_PACKED NvmeSmartLog { > > uint8_t reserved2[320]; > > } NvmeSmartLog; > > > > +typedef struct QEMU_PACKED NvmeSmartLogExtended { > > + uint64_t physical_media_units_written[2]; > > + uint64_t physical_media_units_read[2]; > > + uint64_t bad_user_blocks; > > + uint64_t bad_system_nand_blocks; > > + uint64_t xor_recovery_count; > > + uint64_t uncorrectable_read_error_count; > > + uint64_t soft_ecc_error_count; > > + uint64_t end2end_correction_counts; > > + uint8_t system_data_percent_used; > > + uint8_t refresh_counts[7]; > > + uint64_t user_data_erase_counts; > > + uint16_t thermal_throttling_stat_and_count; > > + uint16_t dssd_spec_version[3]; > > + uint64_t pcie_correctable_error_count; > > + uint32_t incomplete_shutdowns; > > + uint32_t reserved0; > > I know that it is not totally consistent across the code, but please use > `rsvd<BYTEOFFSET>` for the reserved field names. The above would be > `rsvd116` if I am not mistaken. ok > > > + uint8_t percent_free_blocks; > > + uint8_t reserved1[7]; > > + uint16_t capacity_health; > > + uint8_t nvme_errata_ver; > > + uint8_t reserved2[5]; > > + uint64_t unaligned_io; > > + uint64_t security_ver_num; > > + uint64_t total_nuse; > > + uint64_t plp_start_count[2]; > > + uint64_t endurance_estimate[2]; > > + uint64_t pcie_retraining_count; > > + uint64_t power_state_change_count; > > + uint8_t reserved3[286]; > > + uint16_t log_page_version; > > + uint64_t log_page_uuid[2]; > > +} NvmeSmartLogExtended; > > + > > #define NVME_SMART_WARN_MAX 6 > > enum NvmeSmartWarn { > > NVME_SMART_SPARE = 1 << 0, > > @@ -1010,6 +1044,8 @@ enum NvmeLogIdentifier { > > NVME_LOG_FW_SLOT_INFO = 0x03, > > NVME_LOG_CHANGED_NSLIST = 0x04, > > NVME_LOG_CMD_EFFECTS = 0x05, > > + NVME_LOG_VENDOR_START = 0xC0, > > + NVME_LOG_VENDOR_END = 0xFF, > > Existing style is generally lower-case hex. No problem Thx for the review. Will post V3 after running checkpatch > > > }; > > > > typedef struct QEMU_PACKED NvmePSD { > > -- > > 2.30.2 > > > >
On Nov 16 17:19, Joel Granados wrote: > On Tue, Nov 15, 2022 at 12:26:17PM +0100, Klaus Jensen wrote: > > On Nov 14 14:50, Joel Granados wrote: > > > > > > +static uint16_t nvme_vendor_specific_log(uint8_t lid, NvmeCtrl *n, uint8_t rae, > > > + uint32_t buf_len, uint64_t off, > > > + NvmeRequest *req) > > > > `NvmeCtrl *n` must be first parameter. > Any reason why this is the case? I'll change it in my code, but would be > nice to understand the reason. > No other reason than consistency with existing code.
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 220683201a..5e6a8150a2 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4455,6 +4455,42 @@ static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats) stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE]; } +static uint16_t nvme_ocp_extended_smart_info(NvmeCtrl *n, uint8_t rae, + uint32_t buf_len, uint64_t off, + NvmeRequest *req) +{ + NvmeNamespace *ns = NULL; + NvmeSmartLogExtended smart_ext = { 0 }; + struct nvme_stats stats = { 0 }; + uint32_t trans_len; + + if (off >= sizeof(smart_ext)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + // Accumulate all stats from all namespaces + for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + if (ns) + { + nvme_set_blk_stats(ns, &stats); + } + } + + smart_ext.physical_media_units_written[0] = cpu_to_le32(stats.units_written); + smart_ext.physical_media_units_read[0] = cpu_to_le32(stats.units_read); + smart_ext.log_page_version = 0x0003; + smart_ext.log_page_uuid[0] = 0xA4F2BFEA2810AFC5; + smart_ext.log_page_uuid[1] = 0xAFD514C97C6F4F9C; + + if (!rae) { + nvme_clear_events(n, NVME_AER_TYPE_SMART); + } + + trans_len = MIN(sizeof(smart_ext) - off, buf_len); + return nvme_c2h(n, (uint8_t *) &smart_ext + off, trans_len, req); +} + static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, uint64_t off, NvmeRequest *req) { @@ -4642,6 +4678,24 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req); } +static uint16_t nvme_vendor_specific_log(uint8_t lid, NvmeCtrl *n, uint8_t rae, + uint32_t buf_len, uint64_t off, + NvmeRequest *req) +{ + NvmeSubsystem *subsys = n->subsys; + switch (lid) { + case NVME_LOG_VENDOR_START: + if (subsys->params.ocp) { + return nvme_ocp_extended_smart_info(n, rae, buf_len, off, req); + } + break; + /* Add a case for each additional vendor specific log id */ + } + + trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid); + return NVME_INVALID_FIELD | NVME_DNR; +} + static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) { NvmeCmd *cmd = &req->cmd; @@ -4683,6 +4737,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) return nvme_error_info(n, rae, len, off, req); case NVME_LOG_SMART_INFO: return nvme_smart_info(n, rae, len, off, req); + case NVME_LOG_VENDOR_START...NVME_LOG_VENDOR_END: + return nvme_vendor_specific_log(lid, n, rae, len, off, req); case NVME_LOG_FW_SLOT_INFO: return nvme_fw_log_info(n, len, off, req); case NVME_LOG_CHANGED_NSLIST: diff --git a/include/block/nvme.h b/include/block/nvme.h index 8027b7126b..2ab0dca529 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -978,6 +978,40 @@ typedef struct QEMU_PACKED NvmeSmartLog { uint8_t reserved2[320]; } NvmeSmartLog; +typedef struct QEMU_PACKED NvmeSmartLogExtended { + uint64_t physical_media_units_written[2]; + uint64_t physical_media_units_read[2]; + uint64_t bad_user_blocks; + uint64_t bad_system_nand_blocks; + uint64_t xor_recovery_count; + uint64_t uncorrectable_read_error_count; + uint64_t soft_ecc_error_count; + uint64_t end2end_correction_counts; + uint8_t system_data_percent_used; + uint8_t refresh_counts[7]; + uint64_t user_data_erase_counts; + uint16_t thermal_throttling_stat_and_count; + uint16_t dssd_spec_version[3]; + uint64_t pcie_correctable_error_count; + uint32_t incomplete_shutdowns; + uint32_t reserved0; + uint8_t percent_free_blocks; + uint8_t reserved1[7]; + uint16_t capacity_health; + uint8_t nvme_errata_ver; + uint8_t reserved2[5]; + uint64_t unaligned_io; + uint64_t security_ver_num; + uint64_t total_nuse; + uint64_t plp_start_count[2]; + uint64_t endurance_estimate[2]; + uint64_t pcie_retraining_count; + uint64_t power_state_change_count; + uint8_t reserved3[286]; + uint16_t log_page_version; + uint64_t log_page_uuid[2]; +} NvmeSmartLogExtended; + #define NVME_SMART_WARN_MAX 6 enum NvmeSmartWarn { NVME_SMART_SPARE = 1 << 0, @@ -1010,6 +1044,8 @@ enum NvmeLogIdentifier { NVME_LOG_FW_SLOT_INFO = 0x03, NVME_LOG_CHANGED_NSLIST = 0x04, NVME_LOG_CMD_EFFECTS = 0x05, + NVME_LOG_VENDOR_START = 0xC0, + NVME_LOG_VENDOR_END = 0xFF, }; typedef struct QEMU_PACKED NvmePSD {