Message ID | 1648537663-126032-3-git-send-email-robert.hu@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Init vNVDIMM LSA if applicable | expand |
On Tue, 29 Mar 2022 15:07:43 +0800 Robert Hoo <robert.hu@linux.intel.com> wrote: > Since v2.7, QEMU has supported the emulation of NVDIMM's labels. > With -device nvdimm,...,lsa-size=, the vNVDIMM to guest has this > capability. But if the emulated LSA area isn't initialized, guest Kernel > can't enumerate it correctly. > > This patch is to initialize/format the vNVDIMM's LSA, if it has been > designated the Label capability. The index block format will be v1.1 > initially, in order to obtain maximum compatibility. VM user can later > `ndctl init-label` to make it v1.2 if necessary. [1] Can user initialize/format LSA from guest using ndctl/some other tool? > [1] https://uefi.org/sites/default/files/resources/ACPI_Spec_6_4_Jan22.pdf, > Initial Label Storage Area Configuration: > "for Label Storage Areas of 128KB and 256KB, the corresponding Index > Block size is 256 or 512 bytes." Quick search in above spec says such text doesn't exists. above needs grep-able reference + chapter "x.x name" so one could easily find it. > In driver and ndctl code, they refer to these 2 cases as v1.1 and v1.2. > > Signed-off-by: Robert Hoo <robert.hu@linux.intel.com> > Reviewed-by: Liu, Jingqi <jingqi.liu@intel.com> > --- > Note: most functions in this patch are ported from ndctl and nvdimm driver > code. > --- > hw/mem/nvdimm.c | 359 ++++++++++++++++++++++++++++++++++++++++ > include/hw/mem/nvdimm.h | 104 ++++++++++++ > 2 files changed, 463 insertions(+) > > diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c > index 72cd3041ef..cae7f280d2 100644 > --- a/hw/mem/nvdimm.c > +++ b/hw/mem/nvdimm.c > @@ -25,6 +25,9 @@ > #include "qemu/osdep.h" > #include "qemu/module.h" > #include "qemu/pmem.h" > +#include "qemu/cutils.h" > +#include "qemu/bswap.h" > +#include "qemu/error-report.h" > #include "qapi/error.h" > #include "qapi/visitor.h" > #include "hw/mem/nvdimm.h" > @@ -178,6 +181,348 @@ static MemoryRegion *nvdimm_md_get_memory_region(MemoryDeviceState *md, > return nvdimm->nvdimm_mr; > } > > +static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0"; > + > +static unsigned inc_seq(unsigned seq) > +{ > + static const unsigned next[] = { 0, 2, 3, 1 }; > + > + return next[seq & 3]; > +} > + > +static u32 best_seq(u32 a, u32 b) > +{ > + a &= NSINDEX_SEQ_MASK; > + b &= NSINDEX_SEQ_MASK; > + > + if (a == 0 || a == b) { > + return b; > + } else if (b == 0) { > + return a; > + } else if (inc_seq(a) == b) { > + return b; > + } else { > + return a; > + } > +} > + > +static size_t __sizeof_namespace_index(u32 nslot) > +{ > + return ALIGN(sizeof(struct namespace_index) + DIV_ROUND_UP(nslot, 8), > + NSINDEX_ALIGN); > +} > + > +static unsigned sizeof_namespace_label(struct NVDIMMDevice *nvdimm) > +{ > + if (nvdimm->label_size == 0) { > + warn_report("NVDIMM label size is 0, default it to 128."); > + nvdimm->label_size = 128; > + } > + return nvdimm->label_size; > +} > + > +static int __nvdimm_num_label_slots(struct NVDIMMDevice *nvdimm, > + size_t index_size) > +{ > + return (nvdimm->lsa_size - index_size * 2) / > + sizeof_namespace_label(nvdimm); > +} > + > +static int nvdimm_num_label_slots(struct NVDIMMDevice *nvdimm) > +{ > + u32 tmp_nslot, n; > + > + tmp_nslot = nvdimm->lsa_size / nvdimm->label_size; > + n = __sizeof_namespace_index(tmp_nslot) / NSINDEX_ALIGN; > + > + return __nvdimm_num_label_slots(nvdimm, NSINDEX_ALIGN * n); > +} > + > +static unsigned int sizeof_namespace_index(struct NVDIMMDevice *nvdimm) > +{ > + u32 nslot, space, size; > + > + /* > + * Per UEFI 2.7, the minimum size of the Label Storage Area is > + * large enough to hold 2 index blocks and 2 labels. The > + * minimum index block size is 256 bytes, and the minimum label > + * size is 256 bytes. > + */ > + nslot = nvdimm_num_label_slots(nvdimm); > + space = nvdimm->lsa_size - nslot * sizeof_namespace_label(nvdimm); > + size = __sizeof_namespace_index(nslot) * 2; > + if (size <= space && nslot >= 2) { > + return size / 2; > + } > + > + error_report("label area (%ld) too small to host (%d byte) labels", > + nvdimm->lsa_size, sizeof_namespace_label(nvdimm)); > + return 0; > +} > + > +static struct namespace_index *to_namespace_index(struct NVDIMMDevice *nvdimm, > + int i) > +{ > + if (i < 0) { > + return NULL; > + } > + > + return nvdimm->label_data + sizeof_namespace_index(nvdimm) * i; > +} > + > +/* Validate NVDIMM index blocks. Generally refer to driver and ndctl code */ > +static int __nvdimm_label_validate(struct NVDIMMDevice *nvdimm) > +{ > + /* > + * On media label format consists of two index blocks followed > + * by an array of labels. None of these structures are ever > + * updated in place. A sequence number tracks the current > + * active index and the next one to write, while labels are > + * written to free slots. > + * > + * +------------+ > + * | | > + * | nsindex0 | > + * | | > + * +------------+ > + * | | > + * | nsindex1 | > + * | | > + * +------------+ > + * | label0 | > + * +------------+ > + * | label1 | > + * +------------+ > + * | | > + * ....nslot... > + * | | > + * +------------+ > + * | labelN | > + * +------------+ > + */ > + struct namespace_index *nsindex[] = { > + to_namespace_index(nvdimm, 0), > + to_namespace_index(nvdimm, 1), > + }; > + const int num_index = ARRAY_SIZE(nsindex); > + bool valid[2] = { 0 }; > + int i, num_valid = 0; > + u32 seq; > + > + for (i = 0; i < num_index; i++) { > + u32 nslot; > + u8 sig[NSINDEX_SIG_LEN]; > + u64 sum_save, sum, size; > + unsigned int version, labelsize; > + > + memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN); > + if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) { > + nvdimm_debug("nsindex%d signature invalid\n", i); > + continue; > + } > + > + /* label sizes larger than 128 arrived with v1.2 */ > + version = le16_to_cpu(nsindex[i]->major) * 100 > + + le16_to_cpu(nsindex[i]->minor); > + if (version >= 102) { > + labelsize = 1 << (7 + nsindex[i]->labelsize); > + } else { > + labelsize = 128; > + } > + > + if (labelsize != sizeof_namespace_label(nvdimm)) { > + nvdimm_debug("nsindex%d labelsize %d invalid\n", > + i, nsindex[i]->labelsize); > + continue; > + } > + > + sum_save = le64_to_cpu(nsindex[i]->checksum); > + nsindex[i]->checksum = cpu_to_le64(0); > + sum = fletcher64(nsindex[i], sizeof_namespace_index(nvdimm), 1); > + nsindex[i]->checksum = cpu_to_le64(sum_save); > + if (sum != sum_save) { > + nvdimm_debug("nsindex%d checksum invalid\n", i); > + continue; > + } > + > + seq = le32_to_cpu(nsindex[i]->seq); > + if ((seq & NSINDEX_SEQ_MASK) == 0) { > + nvdimm_debug("nsindex%d sequence: 0x%x invalid\n", i, seq); > + continue; > + } > + > + /* sanity check the index against expected values */ > + if (le64_to_cpu(nsindex[i]->myoff) != > + i * sizeof_namespace_index(nvdimm)) { > + nvdimm_debug("nsindex%d myoff: 0x%llx invalid\n", > + i, (unsigned long long) > + le64_to_cpu(nsindex[i]->myoff)); > + continue; > + } > + if (le64_to_cpu(nsindex[i]->otheroff) > + != (!i) * sizeof_namespace_index(nvdimm)) { > + nvdimm_debug("nsindex%d otheroff: 0x%llx invalid\n", > + i, (unsigned long long) > + le64_to_cpu(nsindex[i]->otheroff)); > + continue; > + } > + > + size = le64_to_cpu(nsindex[i]->mysize); > + if (size > sizeof_namespace_index(nvdimm) || > + size < sizeof(struct namespace_index)) { > + nvdimm_debug("nsindex%d mysize: 0x%zx invalid\n", i, size); > + continue; > + } > + > + nslot = le32_to_cpu(nsindex[i]->nslot); > + if (nslot * sizeof_namespace_label(nvdimm) + > + 2 * sizeof_namespace_index(nvdimm) > nvdimm->lsa_size) { > + nvdimm_debug("nsindex%d nslot: %u invalid, config_size: 0x%zx\n", > + i, nslot, nvdimm->lsa_size); > + continue; > + } > + valid[i] = true; > + num_valid++; > + } > + > + switch (num_valid) { > + case 0: > + break; > + case 1: > + for (i = 0; i < num_index; i++) > + if (valid[i]) { > + return i; > + } > + /* can't have num_valid > 0 but valid[] = { false, false } */ > + error_report("unexpected index-block parse error"); > + break; > + default: > + /* pick the best index... */ > + seq = best_seq(le32_to_cpu(nsindex[0]->seq), > + le32_to_cpu(nsindex[1]->seq)); > + if (seq == (le32_to_cpu(nsindex[1]->seq) & NSINDEX_SEQ_MASK)) { > + return 1; > + } else { > + return 0; > + } > + break; > + } > + > + return -1; > +} > + > +static int nvdimm_label_validate(struct NVDIMMDevice *nvdimm) > +{ > + int label_size[] = { 128, 256 }; > + int i, rc; > + > + for (i = 0; i < ARRAY_SIZE(label_size); i++) { > + nvdimm->label_size = label_size[i]; > + rc = __nvdimm_label_validate(nvdimm); > + if (rc >= 0) { > + return rc; > + } > + } > + > + return -1; > +} > + > +static int label_next_nsindex(int index) > +{ > + if (index < 0) { > + return -1; > + } > + > + return (index + 1) % 2; > +} > + > +static void *label_base(struct NVDIMMDevice *nvdimm) > +{ > + void *base = to_namespace_index(nvdimm, 0); > + > + return base + 2 * sizeof_namespace_index(nvdimm); > +} > + > +static int write_label_index(struct NVDIMMDevice *nvdimm, > + enum ndctl_namespace_version ver, unsigned index, unsigned seq) > +{ > + struct namespace_index *nsindex; > + unsigned long offset; > + u64 checksum; > + u32 nslot; > + > + /* > + * We may have initialized ndd to whatever labelsize is > + * currently on the dimm during label_validate(), so we reset it > + * to the desired version here. > + */ > + switch (ver) { > + case NDCTL_NS_VERSION_1_1: > + nvdimm->label_size = 128; > + break; > + case NDCTL_NS_VERSION_1_2: > + nvdimm->label_size = 256; > + break; > + default: > + return -1; > + } > + > + nsindex = to_namespace_index(nvdimm, index); > + nslot = nvdimm_num_label_slots(nvdimm); > + > + memcpy(nsindex->sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN); > + memset(nsindex->flags, 0, 3); > + nsindex->labelsize = sizeof_namespace_label(nvdimm) >> 8; > + nsindex->seq = cpu_to_le32(seq); > + offset = (unsigned long) nsindex > + - (unsigned long) to_namespace_index(nvdimm, 0); > + nsindex->myoff = cpu_to_le64(offset); > + nsindex->mysize = cpu_to_le64(sizeof_namespace_index(nvdimm)); > + offset = (unsigned long) to_namespace_index(nvdimm, > + label_next_nsindex(index)) > + - (unsigned long) to_namespace_index(nvdimm, 0); > + nsindex->otheroff = cpu_to_le64(offset); > + offset = (unsigned long) label_base(nvdimm) > + - (unsigned long) to_namespace_index(nvdimm, 0); > + nsindex->labeloff = cpu_to_le64(offset); > + nsindex->nslot = cpu_to_le32(nslot); > + nsindex->major = cpu_to_le16(1); > + if (sizeof_namespace_label(nvdimm) < 256) { > + nsindex->minor = cpu_to_le16(1); > + } else { > + nsindex->minor = cpu_to_le16(2); > + } > + nsindex->checksum = cpu_to_le64(0); > + /* init label bitmap */ > + memset(nsindex->free, 0xff, ALIGN(nslot, BITS_PER_LONG) / 8); > + checksum = fletcher64(nsindex, sizeof_namespace_index(nvdimm), 1); > + nsindex->checksum = cpu_to_le64(checksum); > + > + return 0; > +} > + > +static int nvdimm_init_label(struct NVDIMMDevice *nvdimm) > +{ > + int i; > + > + for (i = 0; i < 2; i++) { > + int rc; > + > + /* To have most compatibility, we init index block with v1.1 */ > + rc = write_label_index(nvdimm, NDCTL_NS_VERSION_1_1, i, 3 - i); > + > + if (rc < 0) { > + error_report("init No.%d index block failed", i); > + return rc; > + } else { > + nvdimm_debug("%s: dump No.%d index block\n", __func__, i); > + dump_index_block(to_namespace_index(nvdimm, i)); > + } > + } > + > + return 0; > +} > + > static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp) > { > NVDIMMDevice *nvdimm = NVDIMM(dimm); > @@ -187,6 +532,20 @@ static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp) > nvdimm_prepare_memory_region(nvdimm, errp); > } > > + /* When LSA is designaged, validate it. */ > + if (nvdimm->lsa_size != 0) { > + if (buffer_is_zero(nvdimm->label_data, nvdimm->lsa_size) || > + nvdimm_label_validate(nvdimm) < 0) { > + int rc; > + > + info_report("NVDIMM LSA is invalid, needs to be initialized"); > + rc = nvdimm_init_label(nvdimm); > + if (rc < 0) { > + error_report("NVDIMM lsa init failed, rc = %d", rc); > + } > + } > + } > + > if (ndc->realize) { > ndc->realize(nvdimm, errp); > } > diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h > index 8e6a40dc7b..bc1af9248e 100644 > --- a/include/hw/mem/nvdimm.h > +++ b/include/hw/mem/nvdimm.h > @@ -48,14 +48,76 @@ > #define TYPE_NVDIMM "nvdimm" > OBJECT_DECLARE_TYPE(NVDIMMDevice, NVDIMMClass, NVDIMM) > > +typedef uint32_t u32; > +typedef uint64_t u64; > +typedef uint8_t u8; > +typedef uint32_t u32; > + > +#define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1)) > + > #define NVDIMM_LSA_SIZE_PROP "lsa-size" > #define NVDIMM_UUID_PROP "uuid" > #define NVDIMM_UNARMED_PROP "unarmed" > > +enum ndctl_namespace_version { > + NDCTL_NS_VERSION_1_1, > + NDCTL_NS_VERSION_1_2, > +}; > + > +enum { > + NSINDEX_SIG_LEN = 16, > + NSINDEX_ALIGN = 256, > + NSINDEX_SEQ_MASK = 0x3, > + NSLABEL_UUID_LEN = 16, > + NSLABEL_NAME_LEN = 64, > +}; > + > +/** > + * struct namespace_index - label set superblock > + * @sig: NAMESPACE_INDEX\0 > + * @flags: placeholder > + * @labelsize: log2 size (v1 labels 128 bytes v2 labels 256 bytes) > + * @seq: sequence number for this index > + * @myoff: offset of this index in label area > + * @mysize: size of this index struct > + * @otheroff: offset of other index > + * @labeloff: offset of first label slot > + * @nslot: total number of label slots > + * @major: label area major version > + * @minor: label area minor version > + * @checksum: fletcher64 of all fields > + * @free: bitmap, nlabel bits > + * > + * The size of free[] is rounded up so the total struct size is a > + * multiple of NSINDEX_ALIGN bytes. Any bits this allocates beyond > + * nlabel bits must be zero. > + */ > +struct namespace_index { > + uint8_t sig[NSINDEX_SIG_LEN]; > + uint8_t flags[3]; > + uint8_t labelsize; > + uint32_t seq; > + uint64_t myoff; > + uint64_t mysize; > + uint64_t otheroff; > + uint64_t labeloff; > + uint32_t nslot; > + uint16_t major; > + uint16_t minor; > + uint64_t checksum; > + uint8_t free[0]; > +}; > + > struct NVDIMMDevice { > /* private */ > PCDIMMDevice parent_obj; > > + /* > + * Label's size in LSA. Determined by Label version. 128 for v1.1, 256 > + * for v1.2 > + */ > + unsigned int label_size; > + > /* public */ > > /* > @@ -150,6 +212,48 @@ struct NVDIMMState { > }; > typedef struct NVDIMMState NVDIMMState; > > +#if (NVDIMM_DEBUG == 1) > +static inline void dump_index_block(struct namespace_index *nsindex) > +{ > + printf("sig %s\n", nsindex->sig); > + printf("flags 0x%x 0x%x 0x%x\n", nsindex->flags[0], > + nsindex->flags[1], nsindex->flags[2]); > + printf("labelsize %d\n", nsindex->labelsize); > + printf("seq 0x%0x\n", nsindex->seq); > + printf("myoff 0x%"PRIx64"\n", nsindex->myoff); > + printf("mysize 0x%"PRIx64"\n", nsindex->mysize); > + printf("otheroff 0x%"PRIx64"\n", nsindex->otheroff); > + printf("labeloff 0x%"PRIx64"\n", nsindex->labeloff); > + printf("nslot %d\n", nsindex->nslot); > + printf("major %d\n", nsindex->major); > + printf("minor %d\n", nsindex->minor); > + printf("checksum 0x%"PRIx64"\n", nsindex->checksum); > + printf("-------------------------------\n"); > +} > +#else > +static inline void dump_index_block(struct namespace_index *nsindex) > +{ > +} > +#endif > + > +/* > + * Note, fletcher64() is copied from drivers/nvdimm/label.c in the Linux kernel > + */ > +static inline u64 fletcher64(void *addr, size_t len, bool le) > +{ > + u32 *buf = addr; > + u32 lo32 = 0; > + u64 hi32 = 0; > + size_t i; > + > + for (i = 0; i < len / sizeof(u32); i++) { > + lo32 += le ? le32_to_cpu((u32) buf[i]) : buf[i]; > + hi32 += lo32; > + } > + > + return hi32 << 32 | lo32; > +} > + > void nvdimm_init_acpi_state(NVDIMMState *state, MemoryRegion *io, > struct AcpiGenericAddress dsm_io, > FWCfgState *fw_cfg, Object *owner);
On Thu, 2022-03-31 at 14:09 +0200, Igor Mammedov wrote: > On Tue, 29 Mar 2022 15:07:43 +0800 > Robert Hoo <robert.hu@linux.intel.com> wrote: > > > Since v2.7, QEMU has supported the emulation of NVDIMM's labels. > > With -device nvdimm,...,lsa-size=, the vNVDIMM to guest has this > > capability. But if the emulated LSA area isn't initialized, guest > > Kernel > > can't enumerate it correctly. > > > > This patch is to initialize/format the vNVDIMM's LSA, if it has > > been > > designated the Label capability. The index block format will be > > v1.1 > > initially, in order to obtain maximum compatibility. VM user can > > later > > `ndctl init-label` to make it v1.2 if necessary. [1] > > Can user initialize/format LSA from guest using ndctl/some other > tool? > Yes, he can. But when guest Kernel already told him this is a dimm without label capability, dare/should he take this dangerous action?;-) > > > [1] > > https://uefi.org/sites/default/files/resources/ACPI_Spec_6_4_Jan22.pdf > > , > > Initial Label Storage Area Configuration: > > "for Label Storage Areas of 128KB and 256KB, the corresponding > > Index > > Block size is 256 or 512 bytes." > > Quick search in above spec says such text doesn't exists. Sorry, my carelessness, typo with the ACPI spec link. > > above needs grep-able reference + chapter "x.x name" so one could > easily > find it. Right, accept this. > > > > In driver and ndctl code, they refer to these 2 cases as v1.1 and > > v1.2. > > > > Signed-off-by: Robert Hoo <robert.hu@linux.intel.com> > > Reviewed-by: Liu, Jingqi <jingqi.liu@intel.com> > > --- > > Note: most functions in this patch are ported from ndctl and nvdimm > > driver > > code. > > --- > > hw/mem/nvdimm.c | 359 > > ++++++++++++++++++++++++++++++++++++++++ > > include/hw/mem/nvdimm.h | 104 ++++++++++++ > > 2 files changed, 463 insertions(+) > > > > diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c > > index 72cd3041ef..cae7f280d2 100644 > > --- a/hw/mem/nvdimm.c > > +++ b/hw/mem/nvdimm.c > > @@ -25,6 +25,9 @@ > > #include "qemu/osdep.h" > > #include "qemu/module.h" > > #include "qemu/pmem.h" > > +#include "qemu/cutils.h" > > +#include "qemu/bswap.h" > > +#include "qemu/error-report.h" > > #include "qapi/error.h" > > #include "qapi/visitor.h" > > #include "hw/mem/nvdimm.h" > > @@ -178,6 +181,348 @@ static MemoryRegion > > *nvdimm_md_get_memory_region(MemoryDeviceState *md, > > return nvdimm->nvdimm_mr; > > } > > > > +static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0"; > > + > > +static unsigned inc_seq(unsigned seq) > > +{ > > + static const unsigned next[] = { 0, 2, 3, 1 }; > > + > > + return next[seq & 3]; > > +} > > + > > +static u32 best_seq(u32 a, u32 b) > > +{ > > + a &= NSINDEX_SEQ_MASK; > > + b &= NSINDEX_SEQ_MASK; > > + > > + if (a == 0 || a == b) { > > + return b; > > + } else if (b == 0) { > > + return a; > > + } else if (inc_seq(a) == b) { > > + return b; > > + } else { > > + return a; > > + } > > +} > > + > > +static size_t __sizeof_namespace_index(u32 nslot) > > +{ > > + return ALIGN(sizeof(struct namespace_index) + > > DIV_ROUND_UP(nslot, 8), > > + NSINDEX_ALIGN); > > +} > > + > > +static unsigned sizeof_namespace_label(struct NVDIMMDevice > > *nvdimm) > > +{ > > + if (nvdimm->label_size == 0) { > > + warn_report("NVDIMM label size is 0, default it to 128."); > > + nvdimm->label_size = 128; > > + } > > + return nvdimm->label_size; > > +} > > + > > +static int __nvdimm_num_label_slots(struct NVDIMMDevice *nvdimm, > > + size_t index_size) > > +{ > > + return (nvdimm->lsa_size - index_size * 2) / > > + sizeof_namespace_label(nvdimm); > > +} > > + > > +static int nvdimm_num_label_slots(struct NVDIMMDevice *nvdimm) > > +{ > > + u32 tmp_nslot, n; > > + > > + tmp_nslot = nvdimm->lsa_size / nvdimm->label_size; > > + n = __sizeof_namespace_index(tmp_nslot) / NSINDEX_ALIGN; > > + > > + return __nvdimm_num_label_slots(nvdimm, NSINDEX_ALIGN * n); > > +} > > + > > +static unsigned int sizeof_namespace_index(struct NVDIMMDevice > > *nvdimm) > > +{ > > + u32 nslot, space, size; > > + > > + /* > > + * Per UEFI 2.7, the minimum size of the Label Storage Area is > > + * large enough to hold 2 index blocks and 2 labels. The > > + * minimum index block size is 256 bytes, and the minimum > > label > > + * size is 256 bytes. > > + */ > > + nslot = nvdimm_num_label_slots(nvdimm); > > + space = nvdimm->lsa_size - nslot * > > sizeof_namespace_label(nvdimm); > > + size = __sizeof_namespace_index(nslot) * 2; > > + if (size <= space && nslot >= 2) { > > + return size / 2; > > + } > > + > > + error_report("label area (%ld) too small to host (%d byte) > > labels", > > + nvdimm->lsa_size, sizeof_namespace_label(nvdimm)); > > + return 0; > > +} > > + > > +static struct namespace_index *to_namespace_index(struct > > NVDIMMDevice *nvdimm, > > + int i) > > +{ > > + if (i < 0) { > > + return NULL; > > + } > > + > > + return nvdimm->label_data + sizeof_namespace_index(nvdimm) * > > i; > > +} > > + > > +/* Validate NVDIMM index blocks. Generally refer to driver and > > ndctl code */ > > +static int __nvdimm_label_validate(struct NVDIMMDevice *nvdimm) > > +{ > > + /* > > + * On media label format consists of two index blocks followed > > + * by an array of labels. None of these structures are ever > > + * updated in place. A sequence number tracks the current > > + * active index and the next one to write, while labels are > > + * written to free slots. > > + * > > + * +------------+ > > + * | | > > + * | nsindex0 | > > + * | | > > + * +------------+ > > + * | | > > + * | nsindex1 | > > + * | | > > + * +------------+ > > + * | label0 | > > + * +------------+ > > + * | label1 | > > + * +------------+ > > + * | | > > + * ....nslot... > > + * | | > > + * +------------+ > > + * | labelN | > > + * +------------+ > > + */ > > + struct namespace_index *nsindex[] = { > > + to_namespace_index(nvdimm, 0), > > + to_namespace_index(nvdimm, 1), > > + }; > > + const int num_index = ARRAY_SIZE(nsindex); > > + bool valid[2] = { 0 }; > > + int i, num_valid = 0; > > + u32 seq; > > + > > + for (i = 0; i < num_index; i++) { > > + u32 nslot; > > + u8 sig[NSINDEX_SIG_LEN]; > > + u64 sum_save, sum, size; > > + unsigned int version, labelsize; > > + > > + memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN); > > + if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) > > { > > + nvdimm_debug("nsindex%d signature invalid\n", i); > > + continue; > > + } > > + > > + /* label sizes larger than 128 arrived with v1.2 */ > > + version = le16_to_cpu(nsindex[i]->major) * 100 > > + + le16_to_cpu(nsindex[i]->minor); > > + if (version >= 102) { > > + labelsize = 1 << (7 + nsindex[i]->labelsize); > > + } else { > > + labelsize = 128; > > + } > > + > > + if (labelsize != sizeof_namespace_label(nvdimm)) { > > + nvdimm_debug("nsindex%d labelsize %d invalid\n", > > + i, nsindex[i]->labelsize); > > + continue; > > + } > > + > > + sum_save = le64_to_cpu(nsindex[i]->checksum); > > + nsindex[i]->checksum = cpu_to_le64(0); > > + sum = fletcher64(nsindex[i], > > sizeof_namespace_index(nvdimm), 1); > > + nsindex[i]->checksum = cpu_to_le64(sum_save); > > + if (sum != sum_save) { > > + nvdimm_debug("nsindex%d checksum invalid\n", i); > > + continue; > > + } > > + > > + seq = le32_to_cpu(nsindex[i]->seq); > > + if ((seq & NSINDEX_SEQ_MASK) == 0) { > > + nvdimm_debug("nsindex%d sequence: 0x%x invalid\n", i, > > seq); > > + continue; > > + } > > + > > + /* sanity check the index against expected values */ > > + if (le64_to_cpu(nsindex[i]->myoff) != > > + i * sizeof_namespace_index(nvdimm)) { > > + nvdimm_debug("nsindex%d myoff: 0x%llx invalid\n", > > + i, (unsigned long long) > > + le64_to_cpu(nsindex[i]->myoff)); > > + continue; > > + } > > + if (le64_to_cpu(nsindex[i]->otheroff) > > + != (!i) * sizeof_namespace_index(nvdimm)) { > > + nvdimm_debug("nsindex%d otheroff: 0x%llx invalid\n", > > + i, (unsigned long long) > > + le64_to_cpu(nsindex[i]->otheroff)); > > + continue; > > + } > > + > > + size = le64_to_cpu(nsindex[i]->mysize); > > + if (size > sizeof_namespace_index(nvdimm) || > > + size < sizeof(struct namespace_index)) { > > + nvdimm_debug("nsindex%d mysize: 0x%zx invalid\n", i, > > size); > > + continue; > > + } > > + > > + nslot = le32_to_cpu(nsindex[i]->nslot); > > + if (nslot * sizeof_namespace_label(nvdimm) + > > + 2 * sizeof_namespace_index(nvdimm) > nvdimm->lsa_size) > > { > > + nvdimm_debug("nsindex%d nslot: %u invalid, > > config_size: 0x%zx\n", > > + i, nslot, nvdimm->lsa_size); > > + continue; > > + } > > + valid[i] = true; > > + num_valid++; > > + } > > + > > + switch (num_valid) { > > + case 0: > > + break; > > + case 1: > > + for (i = 0; i < num_index; i++) > > + if (valid[i]) { > > + return i; > > + } > > + /* can't have num_valid > 0 but valid[] = { false, false } > > */ > > + error_report("unexpected index-block parse error"); > > + break; > > + default: > > + /* pick the best index... */ > > + seq = best_seq(le32_to_cpu(nsindex[0]->seq), > > + le32_to_cpu(nsindex[1]->seq)); > > + if (seq == (le32_to_cpu(nsindex[1]->seq) & > > NSINDEX_SEQ_MASK)) { > > + return 1; > > + } else { > > + return 0; > > + } > > + break; > > + } > > + > > + return -1; > > +} > > + > > +static int nvdimm_label_validate(struct NVDIMMDevice *nvdimm) > > +{ > > + int label_size[] = { 128, 256 }; > > + int i, rc; > > + > > + for (i = 0; i < ARRAY_SIZE(label_size); i++) { > > + nvdimm->label_size = label_size[i]; > > + rc = __nvdimm_label_validate(nvdimm); > > + if (rc >= 0) { > > + return rc; > > + } > > + } > > + > > + return -1; > > +} > > + > > +static int label_next_nsindex(int index) > > +{ > > + if (index < 0) { > > + return -1; > > + } > > + > > + return (index + 1) % 2; > > +} > > + > > +static void *label_base(struct NVDIMMDevice *nvdimm) > > +{ > > + void *base = to_namespace_index(nvdimm, 0); > > + > > + return base + 2 * sizeof_namespace_index(nvdimm); > > +} > > + > > +static int write_label_index(struct NVDIMMDevice *nvdimm, > > + enum ndctl_namespace_version ver, unsigned index, unsigned > > seq) > > +{ > > + struct namespace_index *nsindex; > > + unsigned long offset; > > + u64 checksum; > > + u32 nslot; > > + > > + /* > > + * We may have initialized ndd to whatever labelsize is > > + * currently on the dimm during label_validate(), so we reset > > it > > + * to the desired version here. > > + */ > > + switch (ver) { > > + case NDCTL_NS_VERSION_1_1: > > + nvdimm->label_size = 128; > > + break; > > + case NDCTL_NS_VERSION_1_2: > > + nvdimm->label_size = 256; > > + break; > > + default: > > + return -1; > > + } > > + > > + nsindex = to_namespace_index(nvdimm, index); > > + nslot = nvdimm_num_label_slots(nvdimm); > > + > > + memcpy(nsindex->sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN); > > + memset(nsindex->flags, 0, 3); > > + nsindex->labelsize = sizeof_namespace_label(nvdimm) >> 8; > > + nsindex->seq = cpu_to_le32(seq); > > + offset = (unsigned long) nsindex > > + - (unsigned long) to_namespace_index(nvdimm, 0); > > + nsindex->myoff = cpu_to_le64(offset); > > + nsindex->mysize = cpu_to_le64(sizeof_namespace_index(nvdimm)); > > + offset = (unsigned long) to_namespace_index(nvdimm, > > + label_next_nsindex(index)) > > + - (unsigned long) to_namespace_index(nvdimm, 0); > > + nsindex->otheroff = cpu_to_le64(offset); > > + offset = (unsigned long) label_base(nvdimm) > > + - (unsigned long) to_namespace_index(nvdimm, 0); > > + nsindex->labeloff = cpu_to_le64(offset); > > + nsindex->nslot = cpu_to_le32(nslot); > > + nsindex->major = cpu_to_le16(1); > > + if (sizeof_namespace_label(nvdimm) < 256) { > > + nsindex->minor = cpu_to_le16(1); > > + } else { > > + nsindex->minor = cpu_to_le16(2); > > + } > > + nsindex->checksum = cpu_to_le64(0); > > + /* init label bitmap */ > > + memset(nsindex->free, 0xff, ALIGN(nslot, BITS_PER_LONG) / 8); > > + checksum = fletcher64(nsindex, sizeof_namespace_index(nvdimm), > > 1); > > + nsindex->checksum = cpu_to_le64(checksum); > > + > > + return 0; > > +} > > + > > +static int nvdimm_init_label(struct NVDIMMDevice *nvdimm) > > +{ > > + int i; > > + > > + for (i = 0; i < 2; i++) { > > + int rc; > > + > > + /* To have most compatibility, we init index block with > > v1.1 */ > > + rc = write_label_index(nvdimm, NDCTL_NS_VERSION_1_1, i, 3 > > - i); > > + > > + if (rc < 0) { > > + error_report("init No.%d index block failed", i); > > + return rc; > > + } else { > > + nvdimm_debug("%s: dump No.%d index block\n", __func__, > > i); > > + dump_index_block(to_namespace_index(nvdimm, i)); > > + } > > + } > > + > > + return 0; > > +} > > + > > static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp) > > { > > NVDIMMDevice *nvdimm = NVDIMM(dimm); > > @@ -187,6 +532,20 @@ static void nvdimm_realize(PCDIMMDevice *dimm, > > Error **errp) > > nvdimm_prepare_memory_region(nvdimm, errp); > > } > > > > + /* When LSA is designaged, validate it. */ > > + if (nvdimm->lsa_size != 0) { > > + if (buffer_is_zero(nvdimm->label_data, nvdimm->lsa_size) > > || > > + nvdimm_label_validate(nvdimm) < 0) { > > + int rc; > > + > > + info_report("NVDIMM LSA is invalid, needs to be > > initialized"); > > + rc = nvdimm_init_label(nvdimm); > > + if (rc < 0) { > > + error_report("NVDIMM lsa init failed, rc = %d", > > rc); > > + } > > + } > > + } > > + > > if (ndc->realize) { > > ndc->realize(nvdimm, errp); > > } > > diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h > > index 8e6a40dc7b..bc1af9248e 100644 > > --- a/include/hw/mem/nvdimm.h > > +++ b/include/hw/mem/nvdimm.h > > @@ -48,14 +48,76 @@ > > #define TYPE_NVDIMM "nvdimm" > > OBJECT_DECLARE_TYPE(NVDIMMDevice, NVDIMMClass, NVDIMM) > > > > +typedef uint32_t u32; > > +typedef uint64_t u64; > > +typedef uint8_t u8; > > +typedef uint32_t u32; > > + > > +#define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1)) > > + > > #define NVDIMM_LSA_SIZE_PROP "lsa-size" > > #define NVDIMM_UUID_PROP "uuid" > > #define NVDIMM_UNARMED_PROP "unarmed" > > > > +enum ndctl_namespace_version { > > + NDCTL_NS_VERSION_1_1, > > + NDCTL_NS_VERSION_1_2, > > +}; > > + > > +enum { > > + NSINDEX_SIG_LEN = 16, > > + NSINDEX_ALIGN = 256, > > + NSINDEX_SEQ_MASK = 0x3, > > + NSLABEL_UUID_LEN = 16, > > + NSLABEL_NAME_LEN = 64, > > +}; > > + > > +/** > > + * struct namespace_index - label set superblock > > + * @sig: NAMESPACE_INDEX\0 > > + * @flags: placeholder > > + * @labelsize: log2 size (v1 labels 128 bytes v2 labels 256 bytes) > > + * @seq: sequence number for this index > > + * @myoff: offset of this index in label area > > + * @mysize: size of this index struct > > + * @otheroff: offset of other index > > + * @labeloff: offset of first label slot > > + * @nslot: total number of label slots > > + * @major: label area major version > > + * @minor: label area minor version > > + * @checksum: fletcher64 of all fields > > + * @free: bitmap, nlabel bits > > + * > > + * The size of free[] is rounded up so the total struct size is a > > + * multiple of NSINDEX_ALIGN bytes. Any bits this allocates > > beyond > > + * nlabel bits must be zero. > > + */ > > +struct namespace_index { > > + uint8_t sig[NSINDEX_SIG_LEN]; > > + uint8_t flags[3]; > > + uint8_t labelsize; > > + uint32_t seq; > > + uint64_t myoff; > > + uint64_t mysize; > > + uint64_t otheroff; > > + uint64_t labeloff; > > + uint32_t nslot; > > + uint16_t major; > > + uint16_t minor; > > + uint64_t checksum; > > + uint8_t free[0]; > > +}; > > + > > struct NVDIMMDevice { > > /* private */ > > PCDIMMDevice parent_obj; > > > > + /* > > + * Label's size in LSA. Determined by Label version. 128 for > > v1.1, 256 > > + * for v1.2 > > + */ > > + unsigned int label_size; > > + > > /* public */ > > > > /* > > @@ -150,6 +212,48 @@ struct NVDIMMState { > > }; > > typedef struct NVDIMMState NVDIMMState; > > > > +#if (NVDIMM_DEBUG == 1) > > +static inline void dump_index_block(struct namespace_index > > *nsindex) > > +{ > > + printf("sig %s\n", nsindex->sig); > > + printf("flags 0x%x 0x%x 0x%x\n", nsindex->flags[0], > > + nsindex->flags[1], nsindex->flags[2]); > > + printf("labelsize %d\n", nsindex->labelsize); > > + printf("seq 0x%0x\n", nsindex->seq); > > + printf("myoff 0x%"PRIx64"\n", nsindex->myoff); > > + printf("mysize 0x%"PRIx64"\n", nsindex->mysize); > > + printf("otheroff 0x%"PRIx64"\n", nsindex->otheroff); > > + printf("labeloff 0x%"PRIx64"\n", nsindex->labeloff); > > + printf("nslot %d\n", nsindex->nslot); > > + printf("major %d\n", nsindex->major); > > + printf("minor %d\n", nsindex->minor); > > + printf("checksum 0x%"PRIx64"\n", nsindex->checksum); > > + printf("-------------------------------\n"); > > +} > > +#else > > +static inline void dump_index_block(struct namespace_index > > *nsindex) > > +{ > > +} > > +#endif > > + > > +/* > > + * Note, fletcher64() is copied from drivers/nvdimm/label.c in the > > Linux kernel > > + */ > > +static inline u64 fletcher64(void *addr, size_t len, bool le) > > +{ > > + u32 *buf = addr; > > + u32 lo32 = 0; > > + u64 hi32 = 0; > > + size_t i; > > + > > + for (i = 0; i < len / sizeof(u32); i++) { > > + lo32 += le ? le32_to_cpu((u32) buf[i]) : buf[i]; > > + hi32 += lo32; > > + } > > + > > + return hi32 << 32 | lo32; > > +} > > + > > void nvdimm_init_acpi_state(NVDIMMState *state, MemoryRegion *io, > > struct AcpiGenericAddress dsm_io, > > FWCfgState *fw_cfg, Object *owner); > >
On Thu, 31 Mar 2022 21:08:12 +0800 Robert Hoo <robert.hu@linux.intel.com> wrote: > On Thu, 2022-03-31 at 14:09 +0200, Igor Mammedov wrote: > > On Tue, 29 Mar 2022 15:07:43 +0800 > > Robert Hoo <robert.hu@linux.intel.com> wrote: > > > > > Since v2.7, QEMU has supported the emulation of NVDIMM's labels. > > > With -device nvdimm,...,lsa-size=, the vNVDIMM to guest has this > > > capability. But if the emulated LSA area isn't initialized, guest > > > Kernel > > > can't enumerate it correctly. > > > > > > This patch is to initialize/format the vNVDIMM's LSA, if it has > > > been > > > designated the Label capability. The index block format will be > > > v1.1 > > > initially, in order to obtain maximum compatibility. VM user can > > > later > > > `ndctl init-label` to make it v1.2 if necessary. [1] > > > > Can user initialize/format LSA from guest using ndctl/some other > > tool? > > > Yes, he can. But when guest Kernel already told him this is a dimm > without label capability, dare/should he take this dangerous action?;-) I don't think this feature belongs to QEMU (i.e. hw emulation). It's task that is usually accomplished by firmware or OS (in context of QEMU its guest's responsibility). PS: It's true that QEMU caries some 'firmware' code, like composing ACPI tables but we do it only to reduce QEMU<->firmware ABI necessary for hardware description and that's pretty much it. Unfortunately this series doesn't fit the bill. > > > [1] > > > https://uefi.org/sites/default/files/resources/ACPI_Spec_6_4_Jan22.pdf > > > , > > > Initial Label Storage Area Configuration: > > > "for Label Storage Areas of 128KB and 256KB, the corresponding > > > Index > > > Block size is 256 or 512 bytes." > > > > Quick search in above spec says such text doesn't exists. > > Sorry, my carelessness, typo with the ACPI spec link. > > > > above needs grep-able reference + chapter "x.x name" so one could > > easily > > find it. > Right, accept this. > > > > > > > In driver and ndctl code, they refer to these 2 cases as v1.1 and > > > v1.2. > > > > > > Signed-off-by: Robert Hoo <robert.hu@linux.intel.com> > > > Reviewed-by: Liu, Jingqi <jingqi.liu@intel.com> > > > --- > > > Note: most functions in this patch are ported from ndctl and nvdimm > > > driver > > > code. > > > --- > > > hw/mem/nvdimm.c | 359 > > > ++++++++++++++++++++++++++++++++++++++++ > > > include/hw/mem/nvdimm.h | 104 ++++++++++++ > > > 2 files changed, 463 insertions(+) > > > > > > diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c > > > index 72cd3041ef..cae7f280d2 100644 > > > --- a/hw/mem/nvdimm.c > > > +++ b/hw/mem/nvdimm.c > > > @@ -25,6 +25,9 @@ > > > #include "qemu/osdep.h" > > > #include "qemu/module.h" > > > #include "qemu/pmem.h" > > > +#include "qemu/cutils.h" > > > +#include "qemu/bswap.h" > > > +#include "qemu/error-report.h" > > > #include "qapi/error.h" > > > #include "qapi/visitor.h" > > > #include "hw/mem/nvdimm.h" > > > @@ -178,6 +181,348 @@ static MemoryRegion > > > *nvdimm_md_get_memory_region(MemoryDeviceState *md, > > > return nvdimm->nvdimm_mr; > > > } > > > > > > +static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0"; > > > + > > > +static unsigned inc_seq(unsigned seq) > > > +{ > > > + static const unsigned next[] = { 0, 2, 3, 1 }; > > > + > > > + return next[seq & 3]; > > > +} > > > + > > > +static u32 best_seq(u32 a, u32 b) > > > +{ > > > + a &= NSINDEX_SEQ_MASK; > > > + b &= NSINDEX_SEQ_MASK; > > > + > > > + if (a == 0 || a == b) { > > > + return b; > > > + } else if (b == 0) { > > > + return a; > > > + } else if (inc_seq(a) == b) { > > > + return b; > > > + } else { > > > + return a; > > > + } > > > +} > > > + > > > +static size_t __sizeof_namespace_index(u32 nslot) > > > +{ > > > + return ALIGN(sizeof(struct namespace_index) + > > > DIV_ROUND_UP(nslot, 8), > > > + NSINDEX_ALIGN); > > > +} > > > + > > > +static unsigned sizeof_namespace_label(struct NVDIMMDevice > > > *nvdimm) > > > +{ > > > + if (nvdimm->label_size == 0) { > > > + warn_report("NVDIMM label size is 0, default it to 128."); > > > + nvdimm->label_size = 128; > > > + } > > > + return nvdimm->label_size; > > > +} > > > + > > > +static int __nvdimm_num_label_slots(struct NVDIMMDevice *nvdimm, > > > + size_t index_size) > > > +{ > > > + return (nvdimm->lsa_size - index_size * 2) / > > > + sizeof_namespace_label(nvdimm); > > > +} > > > + > > > +static int nvdimm_num_label_slots(struct NVDIMMDevice *nvdimm) > > > +{ > > > + u32 tmp_nslot, n; > > > + > > > + tmp_nslot = nvdimm->lsa_size / nvdimm->label_size; > > > + n = __sizeof_namespace_index(tmp_nslot) / NSINDEX_ALIGN; > > > + > > > + return __nvdimm_num_label_slots(nvdimm, NSINDEX_ALIGN * n); > > > +} > > > + > > > +static unsigned int sizeof_namespace_index(struct NVDIMMDevice > > > *nvdimm) > > > +{ > > > + u32 nslot, space, size; > > > + > > > + /* > > > + * Per UEFI 2.7, the minimum size of the Label Storage Area is > > > + * large enough to hold 2 index blocks and 2 labels. The > > > + * minimum index block size is 256 bytes, and the minimum > > > label > > > + * size is 256 bytes. > > > + */ > > > + nslot = nvdimm_num_label_slots(nvdimm); > > > + space = nvdimm->lsa_size - nslot * > > > sizeof_namespace_label(nvdimm); > > > + size = __sizeof_namespace_index(nslot) * 2; > > > + if (size <= space && nslot >= 2) { > > > + return size / 2; > > > + } > > > + > > > + error_report("label area (%ld) too small to host (%d byte) > > > labels", > > > + nvdimm->lsa_size, sizeof_namespace_label(nvdimm)); > > > + return 0; > > > +} > > > + > > > +static struct namespace_index *to_namespace_index(struct > > > NVDIMMDevice *nvdimm, > > > + int i) > > > +{ > > > + if (i < 0) { > > > + return NULL; > > > + } > > > + > > > + return nvdimm->label_data + sizeof_namespace_index(nvdimm) * > > > i; > > > +} > > > + > > > +/* Validate NVDIMM index blocks. Generally refer to driver and > > > ndctl code */ > > > +static int __nvdimm_label_validate(struct NVDIMMDevice *nvdimm) > > > +{ > > > + /* > > > + * On media label format consists of two index blocks followed > > > + * by an array of labels. None of these structures are ever > > > + * updated in place. A sequence number tracks the current > > > + * active index and the next one to write, while labels are > > > + * written to free slots. > > > + * > > > + * +------------+ > > > + * | | > > > + * | nsindex0 | > > > + * | | > > > + * +------------+ > > > + * | | > > > + * | nsindex1 | > > > + * | | > > > + * +------------+ > > > + * | label0 | > > > + * +------------+ > > > + * | label1 | > > > + * +------------+ > > > + * | | > > > + * ....nslot... > > > + * | | > > > + * +------------+ > > > + * | labelN | > > > + * +------------+ > > > + */ > > > + struct namespace_index *nsindex[] = { > > > + to_namespace_index(nvdimm, 0), > > > + to_namespace_index(nvdimm, 1), > > > + }; > > > + const int num_index = ARRAY_SIZE(nsindex); > > > + bool valid[2] = { 0 }; > > > + int i, num_valid = 0; > > > + u32 seq; > > > + > > > + for (i = 0; i < num_index; i++) { > > > + u32 nslot; > > > + u8 sig[NSINDEX_SIG_LEN]; > > > + u64 sum_save, sum, size; > > > + unsigned int version, labelsize; > > > + > > > + memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN); > > > + if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) > > > { > > > + nvdimm_debug("nsindex%d signature invalid\n", i); > > > + continue; > > > + } > > > + > > > + /* label sizes larger than 128 arrived with v1.2 */ > > > + version = le16_to_cpu(nsindex[i]->major) * 100 > > > + + le16_to_cpu(nsindex[i]->minor); > > > + if (version >= 102) { > > > + labelsize = 1 << (7 + nsindex[i]->labelsize); > > > + } else { > > > + labelsize = 128; > > > + } > > > + > > > + if (labelsize != sizeof_namespace_label(nvdimm)) { > > > + nvdimm_debug("nsindex%d labelsize %d invalid\n", > > > + i, nsindex[i]->labelsize); > > > + continue; > > > + } > > > + > > > + sum_save = le64_to_cpu(nsindex[i]->checksum); > > > + nsindex[i]->checksum = cpu_to_le64(0); > > > + sum = fletcher64(nsindex[i], > > > sizeof_namespace_index(nvdimm), 1); > > > + nsindex[i]->checksum = cpu_to_le64(sum_save); > > > + if (sum != sum_save) { > > > + nvdimm_debug("nsindex%d checksum invalid\n", i); > > > + continue; > > > + } > > > + > > > + seq = le32_to_cpu(nsindex[i]->seq); > > > + if ((seq & NSINDEX_SEQ_MASK) == 0) { > > > + nvdimm_debug("nsindex%d sequence: 0x%x invalid\n", i, > > > seq); > > > + continue; > > > + } > > > + > > > + /* sanity check the index against expected values */ > > > + if (le64_to_cpu(nsindex[i]->myoff) != > > > + i * sizeof_namespace_index(nvdimm)) { > > > + nvdimm_debug("nsindex%d myoff: 0x%llx invalid\n", > > > + i, (unsigned long long) > > > + le64_to_cpu(nsindex[i]->myoff)); > > > + continue; > > > + } > > > + if (le64_to_cpu(nsindex[i]->otheroff) > > > + != (!i) * sizeof_namespace_index(nvdimm)) { > > > + nvdimm_debug("nsindex%d otheroff: 0x%llx invalid\n", > > > + i, (unsigned long long) > > > + le64_to_cpu(nsindex[i]->otheroff)); > > > + continue; > > > + } > > > + > > > + size = le64_to_cpu(nsindex[i]->mysize); > > > + if (size > sizeof_namespace_index(nvdimm) || > > > + size < sizeof(struct namespace_index)) { > > > + nvdimm_debug("nsindex%d mysize: 0x%zx invalid\n", i, > > > size); > > > + continue; > > > + } > > > + > > > + nslot = le32_to_cpu(nsindex[i]->nslot); > > > + if (nslot * sizeof_namespace_label(nvdimm) + > > > + 2 * sizeof_namespace_index(nvdimm) > nvdimm->lsa_size) > > > { > > > + nvdimm_debug("nsindex%d nslot: %u invalid, > > > config_size: 0x%zx\n", > > > + i, nslot, nvdimm->lsa_size); > > > + continue; > > > + } > > > + valid[i] = true; > > > + num_valid++; > > > + } > > > + > > > + switch (num_valid) { > > > + case 0: > > > + break; > > > + case 1: > > > + for (i = 0; i < num_index; i++) > > > + if (valid[i]) { > > > + return i; > > > + } > > > + /* can't have num_valid > 0 but valid[] = { false, false } > > > */ > > > + error_report("unexpected index-block parse error"); > > > + break; > > > + default: > > > + /* pick the best index... */ > > > + seq = best_seq(le32_to_cpu(nsindex[0]->seq), > > > + le32_to_cpu(nsindex[1]->seq)); > > > + if (seq == (le32_to_cpu(nsindex[1]->seq) & > > > NSINDEX_SEQ_MASK)) { > > > + return 1; > > > + } else { > > > + return 0; > > > + } > > > + break; > > > + } > > > + > > > + return -1; > > > +} > > > + > > > +static int nvdimm_label_validate(struct NVDIMMDevice *nvdimm) > > > +{ > > > + int label_size[] = { 128, 256 }; > > > + int i, rc; > > > + > > > + for (i = 0; i < ARRAY_SIZE(label_size); i++) { > > > + nvdimm->label_size = label_size[i]; > > > + rc = __nvdimm_label_validate(nvdimm); > > > + if (rc >= 0) { > > > + return rc; > > > + } > > > + } > > > + > > > + return -1; > > > +} > > > + > > > +static int label_next_nsindex(int index) > > > +{ > > > + if (index < 0) { > > > + return -1; > > > + } > > > + > > > + return (index + 1) % 2; > > > +} > > > + > > > +static void *label_base(struct NVDIMMDevice *nvdimm) > > > +{ > > > + void *base = to_namespace_index(nvdimm, 0); > > > + > > > + return base + 2 * sizeof_namespace_index(nvdimm); > > > +} > > > + > > > +static int write_label_index(struct NVDIMMDevice *nvdimm, > > > + enum ndctl_namespace_version ver, unsigned index, unsigned > > > seq) > > > +{ > > > + struct namespace_index *nsindex; > > > + unsigned long offset; > > > + u64 checksum; > > > + u32 nslot; > > > + > > > + /* > > > + * We may have initialized ndd to whatever labelsize is > > > + * currently on the dimm during label_validate(), so we reset > > > it > > > + * to the desired version here. > > > + */ > > > + switch (ver) { > > > + case NDCTL_NS_VERSION_1_1: > > > + nvdimm->label_size = 128; > > > + break; > > > + case NDCTL_NS_VERSION_1_2: > > > + nvdimm->label_size = 256; > > > + break; > > > + default: > > > + return -1; > > > + } > > > + > > > + nsindex = to_namespace_index(nvdimm, index); > > > + nslot = nvdimm_num_label_slots(nvdimm); > > > + > > > + memcpy(nsindex->sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN); > > > + memset(nsindex->flags, 0, 3); > > > + nsindex->labelsize = sizeof_namespace_label(nvdimm) >> 8; > > > + nsindex->seq = cpu_to_le32(seq); > > > + offset = (unsigned long) nsindex > > > + - (unsigned long) to_namespace_index(nvdimm, 0); > > > + nsindex->myoff = cpu_to_le64(offset); > > > + nsindex->mysize = cpu_to_le64(sizeof_namespace_index(nvdimm)); > > > + offset = (unsigned long) to_namespace_index(nvdimm, > > > + label_next_nsindex(index)) > > > + - (unsigned long) to_namespace_index(nvdimm, 0); > > > + nsindex->otheroff = cpu_to_le64(offset); > > > + offset = (unsigned long) label_base(nvdimm) > > > + - (unsigned long) to_namespace_index(nvdimm, 0); > > > + nsindex->labeloff = cpu_to_le64(offset); > > > + nsindex->nslot = cpu_to_le32(nslot); > > > + nsindex->major = cpu_to_le16(1); > > > + if (sizeof_namespace_label(nvdimm) < 256) { > > > + nsindex->minor = cpu_to_le16(1); > > > + } else { > > > + nsindex->minor = cpu_to_le16(2); > > > + } > > > + nsindex->checksum = cpu_to_le64(0); > > > + /* init label bitmap */ > > > + memset(nsindex->free, 0xff, ALIGN(nslot, BITS_PER_LONG) / 8); > > > + checksum = fletcher64(nsindex, sizeof_namespace_index(nvdimm), > > > 1); > > > + nsindex->checksum = cpu_to_le64(checksum); > > > + > > > + return 0; > > > +} > > > + > > > +static int nvdimm_init_label(struct NVDIMMDevice *nvdimm) > > > +{ > > > + int i; > > > + > > > + for (i = 0; i < 2; i++) { > > > + int rc; > > > + > > > + /* To have most compatibility, we init index block with > > > v1.1 */ > > > + rc = write_label_index(nvdimm, NDCTL_NS_VERSION_1_1, i, 3 > > > - i); > > > + > > > + if (rc < 0) { > > > + error_report("init No.%d index block failed", i); > > > + return rc; > > > + } else { > > > + nvdimm_debug("%s: dump No.%d index block\n", __func__, > > > i); > > > + dump_index_block(to_namespace_index(nvdimm, i)); > > > + } > > > + } > > > + > > > + return 0; > > > +} > > > + > > > static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp) > > > { > > > NVDIMMDevice *nvdimm = NVDIMM(dimm); > > > @@ -187,6 +532,20 @@ static void nvdimm_realize(PCDIMMDevice *dimm, > > > Error **errp) > > > nvdimm_prepare_memory_region(nvdimm, errp); > > > } > > > > > > + /* When LSA is designaged, validate it. */ > > > + if (nvdimm->lsa_size != 0) { > > > + if (buffer_is_zero(nvdimm->label_data, nvdimm->lsa_size) > > > || > > > + nvdimm_label_validate(nvdimm) < 0) { > > > + int rc; > > > + > > > + info_report("NVDIMM LSA is invalid, needs to be > > > initialized"); > > > + rc = nvdimm_init_label(nvdimm); > > > + if (rc < 0) { > > > + error_report("NVDIMM lsa init failed, rc = %d", > > > rc); > > > + } > > > + } > > > + } > > > + > > > if (ndc->realize) { > > > ndc->realize(nvdimm, errp); > > > } > > > diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h > > > index 8e6a40dc7b..bc1af9248e 100644 > > > --- a/include/hw/mem/nvdimm.h > > > +++ b/include/hw/mem/nvdimm.h > > > @@ -48,14 +48,76 @@ > > > #define TYPE_NVDIMM "nvdimm" > > > OBJECT_DECLARE_TYPE(NVDIMMDevice, NVDIMMClass, NVDIMM) > > > > > > +typedef uint32_t u32; > > > +typedef uint64_t u64; > > > +typedef uint8_t u8; > > > +typedef uint32_t u32; > > > + > > > +#define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1)) > > > + > > > #define NVDIMM_LSA_SIZE_PROP "lsa-size" > > > #define NVDIMM_UUID_PROP "uuid" > > > #define NVDIMM_UNARMED_PROP "unarmed" > > > > > > +enum ndctl_namespace_version { > > > + NDCTL_NS_VERSION_1_1, > > > + NDCTL_NS_VERSION_1_2, > > > +}; > > > + > > > +enum { > > > + NSINDEX_SIG_LEN = 16, > > > + NSINDEX_ALIGN = 256, > > > + NSINDEX_SEQ_MASK = 0x3, > > > + NSLABEL_UUID_LEN = 16, > > > + NSLABEL_NAME_LEN = 64, > > > +}; > > > + > > > +/** > > > + * struct namespace_index - label set superblock > > > + * @sig: NAMESPACE_INDEX\0 > > > + * @flags: placeholder > > > + * @labelsize: log2 size (v1 labels 128 bytes v2 labels 256 bytes) > > > + * @seq: sequence number for this index > > > + * @myoff: offset of this index in label area > > > + * @mysize: size of this index struct > > > + * @otheroff: offset of other index > > > + * @labeloff: offset of first label slot > > > + * @nslot: total number of label slots > > > + * @major: label area major version > > > + * @minor: label area minor version > > > + * @checksum: fletcher64 of all fields > > > + * @free: bitmap, nlabel bits > > > + * > > > + * The size of free[] is rounded up so the total struct size is a > > > + * multiple of NSINDEX_ALIGN bytes. Any bits this allocates > > > beyond > > > + * nlabel bits must be zero. > > > + */ > > > +struct namespace_index { > > > + uint8_t sig[NSINDEX_SIG_LEN]; > > > + uint8_t flags[3]; > > > + uint8_t labelsize; > > > + uint32_t seq; > > > + uint64_t myoff; > > > + uint64_t mysize; > > > + uint64_t otheroff; > > > + uint64_t labeloff; > > > + uint32_t nslot; > > > + uint16_t major; > > > + uint16_t minor; > > > + uint64_t checksum; > > > + uint8_t free[0]; > > > +}; > > > + > > > struct NVDIMMDevice { > > > /* private */ > > > PCDIMMDevice parent_obj; > > > > > > + /* > > > + * Label's size in LSA. Determined by Label version. 128 for > > > v1.1, 256 > > > + * for v1.2 > > > + */ > > > + unsigned int label_size; > > > + > > > /* public */ > > > > > > /* > > > @@ -150,6 +212,48 @@ struct NVDIMMState { > > > }; > > > typedef struct NVDIMMState NVDIMMState; > > > > > > +#if (NVDIMM_DEBUG == 1) > > > +static inline void dump_index_block(struct namespace_index > > > *nsindex) > > > +{ > > > + printf("sig %s\n", nsindex->sig); > > > + printf("flags 0x%x 0x%x 0x%x\n", nsindex->flags[0], > > > + nsindex->flags[1], nsindex->flags[2]); > > > + printf("labelsize %d\n", nsindex->labelsize); > > > + printf("seq 0x%0x\n", nsindex->seq); > > > + printf("myoff 0x%"PRIx64"\n", nsindex->myoff); > > > + printf("mysize 0x%"PRIx64"\n", nsindex->mysize); > > > + printf("otheroff 0x%"PRIx64"\n", nsindex->otheroff); > > > + printf("labeloff 0x%"PRIx64"\n", nsindex->labeloff); > > > + printf("nslot %d\n", nsindex->nslot); > > > + printf("major %d\n", nsindex->major); > > > + printf("minor %d\n", nsindex->minor); > > > + printf("checksum 0x%"PRIx64"\n", nsindex->checksum); > > > + printf("-------------------------------\n"); > > > +} > > > +#else > > > +static inline void dump_index_block(struct namespace_index > > > *nsindex) > > > +{ > > > +} > > > +#endif > > > + > > > +/* > > > + * Note, fletcher64() is copied from drivers/nvdimm/label.c in the > > > Linux kernel > > > + */ > > > +static inline u64 fletcher64(void *addr, size_t len, bool le) > > > +{ > > > + u32 *buf = addr; > > > + u32 lo32 = 0; > > > + u64 hi32 = 0; > > > + size_t i; > > > + > > > + for (i = 0; i < len / sizeof(u32); i++) { > > > + lo32 += le ? le32_to_cpu((u32) buf[i]) : buf[i]; > > > + hi32 += lo32; > > > + } > > > + > > > + return hi32 << 32 | lo32; > > > +} > > > + > > > void nvdimm_init_acpi_state(NVDIMMState *state, MemoryRegion *io, > > > struct AcpiGenericAddress dsm_io, > > > FWCfgState *fw_cfg, Object *owner); > > > > >
On Thu, 2022-03-31 at 16:41 +0200, Igor Mammedov wrote: > On Thu, 31 Mar 2022 21:08:12 +0800 > Robert Hoo <robert.hu@linux.intel.com> wrote: > > > > > > > Can user initialize/format LSA from guest using ndctl/some other > > > tool? > > > > > > > Yes, he can. But when guest Kernel already told him this is a dimm > > without label capability, dare/should he take this dangerous > > action?;-) > > I don't think this feature belongs to QEMU (i.e. hw emulation). > It's task that is usually accomplished by firmware or OS > (in context of QEMU its guest's responsibility). > Thanks Igor. Actually before I compose this patch, I was pondering on this as well: whose obligation to fulfill this function, i.e. initialize the LSA. So I asked around (and still asking), knowing these about native usage, (correct me if I'm wrong), which we virtualization should mimic in principle: a) before user start to use NVDIMM, he's supposed to ipmctl[0] create goal firstly, to determine 2LM mode or app direct mode, which usually initializes the LSA. So user doesn't necessarily to explicit 'ndctl init-label' although he can do this to init LSA again. b) I heard that, perhaps, even when DIMMs are sent out from factory, it has LSA initialized (not quite certain about this, I'm still confirming). What specs say --- In NVDIMM Namespace spec[1], Chap 2 "Namespaces": "NVDIMM vendors define the size of their label storage area and, therefor, the number of labels it holds." I think: In QEMU context, it's QEMU who's the vNVDIMM's vendor. In UEFI spec [2], "13.19 NVDIMM Label Protocol", page 640: "Before Index Blocks and labels can be utilized, the software managing the Label Storage Area must determine the total number of labels that will be supported and utilizing the description above, calculate the size of the Index Blocks required." I think: In QEMU context, it's QEMU who emulates LSA and therefore the management software of it. What's real limitation on QEMU vNVDIMM implementation --- In VM: ipmctl isn't supported. Only app direct mode is supported. (i.e. no bother to ipmctl create goal first). vNVDIMM is actually presented to user in a ready-to-use initial state. We never tell user you must 'ndctl init-label' then can use it. Nor tell user that you should 'ipmctl create-goal' first, because in fact ipmctl isn't available at all. That's all the story and thoughts before I compose this patch:) [0] https://docs.pmem.io/ipmctl-user-guide/ (and, ipmctl is for Intel Optane PMEM only) [1] https://pmem.io/documents/NVDIMM_Namespace_Spec.pdf [2] https://uefi.org/sites/default/files/resources/UEFI_Spec_2_9_2021_03_18.pdf > > PS: > It's true that QEMU caries some 'firmware' code, like composing > ACPI tables but we do it only to reduce QEMU<->firmware ABI > necessary for hardware description and that's pretty much it. > Unfortunately this series doesn't fit the bill. > Yeah, I've seen this part of code, but a little difficult to comprehend them, especially for me a stranger to ACPI. Where can I find related design document? I now only find a valuable doc: docs/specs/acpi_nvdimm.rst. >
On Fri, 01 Apr 2022 12:07:56 +0800 Robert Hoo <robert.hu@linux.intel.com> wrote: > On Thu, 2022-03-31 at 16:41 +0200, Igor Mammedov wrote: > > On Thu, 31 Mar 2022 21:08:12 +0800 > > Robert Hoo <robert.hu@linux.intel.com> wrote: > > > > > > > > > > Can user initialize/format LSA from guest using ndctl/some other > > > > tool? > > > > > > > > > > Yes, he can. But when guest Kernel already told him this is a dimm > > > without label capability, dare/should he take this dangerous > > > action?;-) > > > > I don't think this feature belongs to QEMU (i.e. hw emulation). > > It's task that is usually accomplished by firmware or OS > > (in context of QEMU its guest's responsibility). > > > > Thanks Igor. > Actually before I compose this patch, I was pondering on this as well: > whose obligation to fulfill this function, i.e. initialize the LSA. > > So I asked around (and still asking), knowing these about native usage, > (correct me if I'm wrong), which we virtualization should mimic in > principle: > > a) before user start to use NVDIMM, he's supposed to ipmctl[0] create > goal firstly, to determine 2LM mode or app direct mode, which usually > initializes the LSA. So user doesn't necessarily to explicit 'ndctl > init-label' although he can do this to init LSA again. > > b) I heard that, perhaps, even when DIMMs are sent out from factory, it > has LSA initialized (not quite certain about this, I'm still > confirming). if you find a NVDIMM that implements initialization in hardware, then it could be considered. But QEMU isn't a factory, it's rather a component within factory that perform specific task while other components manage it (including storage it consumes, see libguestfs project which is similar to what you are trying to do, but deals with conventional storage). > What specs say > --- > In NVDIMM Namespace spec[1], Chap 2 "Namespaces": > "NVDIMM vendors define the size of their label storage area and, > therefor, the number of labels it holds." one does define size and lsa size on QEMU command line, how it will be consumed is the business of firmware/operating system that runs within VM though. > I think: In QEMU context, it's QEMU who's the vNVDIMM's vendor. > > In UEFI spec [2], "13.19 NVDIMM Label Protocol", page 640: > "Before Index Blocks and labels can be utilized, the software managing > the Label Storage Area must determine the total number of labels that > will be supported and utilizing the description above, calculate the > size of the Index Blocks required." > > I think: In QEMU context, it's QEMU who emulates LSA and therefore the > management software of it. > > What's real limitation on QEMU vNVDIMM implementation > --- > In VM: > ipmctl isn't supported. > Only app direct mode is supported. (i.e. no bother to ipmctl create > goal first). > vNVDIMM is actually presented to user in a ready-to-use initial state. > We never tell user you must 'ndctl init-label' then can use it. > Nor tell user that you should 'ipmctl create-goal' first, because in > fact ipmctl isn't available at all. ipmictl isn't hardware, it's tool to connect to firmware running on BMC. In virt world it corresponds to guest code running within VM or some mgmt app outside QEMU that can implement IPMI interface. You can try to generalize this utility and extend EDKII to support it, which would benefit not only QEMU but other consumers of EDKII. wrt IPMI, I'm not familiar with BMC support in QEMU, but looks there are at least some (see hw/ipmi folder) implementations. As for [b] point, QEMU is not software managing NVDIMM, it's hardware emulator. Duplicating irrelevant features in QEMU will just bloat it and make project unmanageable. Point [b] to me looks more like a separate utility that could initialize vNVDIMM for further consumption (I'd ask libguestfs or ndctl folks if they would like to add support for 'out of band' vNVDIMM management, but likely outcome to this would be what libguestfs is doing currently with disks, start VM appliance and run ndctl within it to initialize vNVDIMM). > That's all the story and thoughts before I compose this patch:) > > [0] https://docs.pmem.io/ipmctl-user-guide/ (and, ipmctl is for Intel > Optane PMEM only) > [1] https://pmem.io/documents/NVDIMM_Namespace_Spec.pdf > [2] > https://uefi.org/sites/default/files/resources/UEFI_Spec_2_9_2021_03_18.pdf > > > > > PS: > > It's true that QEMU caries some 'firmware' code, like composing > > ACPI tables but we do it only to reduce QEMU<->firmware ABI > > necessary for hardware description and that's pretty much it. > > Unfortunately this series doesn't fit the bill. > > > Yeah, I've seen this part of code, but a little difficult to comprehend > them, especially for me a stranger to ACPI. Where can I find related > design document? > I now only find a valuable doc: docs/specs/acpi_nvdimm.rst. > > >
diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c index 72cd3041ef..cae7f280d2 100644 --- a/hw/mem/nvdimm.c +++ b/hw/mem/nvdimm.c @@ -25,6 +25,9 @@ #include "qemu/osdep.h" #include "qemu/module.h" #include "qemu/pmem.h" +#include "qemu/cutils.h" +#include "qemu/bswap.h" +#include "qemu/error-report.h" #include "qapi/error.h" #include "qapi/visitor.h" #include "hw/mem/nvdimm.h" @@ -178,6 +181,348 @@ static MemoryRegion *nvdimm_md_get_memory_region(MemoryDeviceState *md, return nvdimm->nvdimm_mr; } +static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0"; + +static unsigned inc_seq(unsigned seq) +{ + static const unsigned next[] = { 0, 2, 3, 1 }; + + return next[seq & 3]; +} + +static u32 best_seq(u32 a, u32 b) +{ + a &= NSINDEX_SEQ_MASK; + b &= NSINDEX_SEQ_MASK; + + if (a == 0 || a == b) { + return b; + } else if (b == 0) { + return a; + } else if (inc_seq(a) == b) { + return b; + } else { + return a; + } +} + +static size_t __sizeof_namespace_index(u32 nslot) +{ + return ALIGN(sizeof(struct namespace_index) + DIV_ROUND_UP(nslot, 8), + NSINDEX_ALIGN); +} + +static unsigned sizeof_namespace_label(struct NVDIMMDevice *nvdimm) +{ + if (nvdimm->label_size == 0) { + warn_report("NVDIMM label size is 0, default it to 128."); + nvdimm->label_size = 128; + } + return nvdimm->label_size; +} + +static int __nvdimm_num_label_slots(struct NVDIMMDevice *nvdimm, + size_t index_size) +{ + return (nvdimm->lsa_size - index_size * 2) / + sizeof_namespace_label(nvdimm); +} + +static int nvdimm_num_label_slots(struct NVDIMMDevice *nvdimm) +{ + u32 tmp_nslot, n; + + tmp_nslot = nvdimm->lsa_size / nvdimm->label_size; + n = __sizeof_namespace_index(tmp_nslot) / NSINDEX_ALIGN; + + return __nvdimm_num_label_slots(nvdimm, NSINDEX_ALIGN * n); +} + +static unsigned int sizeof_namespace_index(struct NVDIMMDevice *nvdimm) +{ + u32 nslot, space, size; + + /* + * Per UEFI 2.7, the minimum size of the Label Storage Area is + * large enough to hold 2 index blocks and 2 labels. The + * minimum index block size is 256 bytes, and the minimum label + * size is 256 bytes. + */ + nslot = nvdimm_num_label_slots(nvdimm); + space = nvdimm->lsa_size - nslot * sizeof_namespace_label(nvdimm); + size = __sizeof_namespace_index(nslot) * 2; + if (size <= space && nslot >= 2) { + return size / 2; + } + + error_report("label area (%ld) too small to host (%d byte) labels", + nvdimm->lsa_size, sizeof_namespace_label(nvdimm)); + return 0; +} + +static struct namespace_index *to_namespace_index(struct NVDIMMDevice *nvdimm, + int i) +{ + if (i < 0) { + return NULL; + } + + return nvdimm->label_data + sizeof_namespace_index(nvdimm) * i; +} + +/* Validate NVDIMM index blocks. Generally refer to driver and ndctl code */ +static int __nvdimm_label_validate(struct NVDIMMDevice *nvdimm) +{ + /* + * On media label format consists of two index blocks followed + * by an array of labels. None of these structures are ever + * updated in place. A sequence number tracks the current + * active index and the next one to write, while labels are + * written to free slots. + * + * +------------+ + * | | + * | nsindex0 | + * | | + * +------------+ + * | | + * | nsindex1 | + * | | + * +------------+ + * | label0 | + * +------------+ + * | label1 | + * +------------+ + * | | + * ....nslot... + * | | + * +------------+ + * | labelN | + * +------------+ + */ + struct namespace_index *nsindex[] = { + to_namespace_index(nvdimm, 0), + to_namespace_index(nvdimm, 1), + }; + const int num_index = ARRAY_SIZE(nsindex); + bool valid[2] = { 0 }; + int i, num_valid = 0; + u32 seq; + + for (i = 0; i < num_index; i++) { + u32 nslot; + u8 sig[NSINDEX_SIG_LEN]; + u64 sum_save, sum, size; + unsigned int version, labelsize; + + memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN); + if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) { + nvdimm_debug("nsindex%d signature invalid\n", i); + continue; + } + + /* label sizes larger than 128 arrived with v1.2 */ + version = le16_to_cpu(nsindex[i]->major) * 100 + + le16_to_cpu(nsindex[i]->minor); + if (version >= 102) { + labelsize = 1 << (7 + nsindex[i]->labelsize); + } else { + labelsize = 128; + } + + if (labelsize != sizeof_namespace_label(nvdimm)) { + nvdimm_debug("nsindex%d labelsize %d invalid\n", + i, nsindex[i]->labelsize); + continue; + } + + sum_save = le64_to_cpu(nsindex[i]->checksum); + nsindex[i]->checksum = cpu_to_le64(0); + sum = fletcher64(nsindex[i], sizeof_namespace_index(nvdimm), 1); + nsindex[i]->checksum = cpu_to_le64(sum_save); + if (sum != sum_save) { + nvdimm_debug("nsindex%d checksum invalid\n", i); + continue; + } + + seq = le32_to_cpu(nsindex[i]->seq); + if ((seq & NSINDEX_SEQ_MASK) == 0) { + nvdimm_debug("nsindex%d sequence: 0x%x invalid\n", i, seq); + continue; + } + + /* sanity check the index against expected values */ + if (le64_to_cpu(nsindex[i]->myoff) != + i * sizeof_namespace_index(nvdimm)) { + nvdimm_debug("nsindex%d myoff: 0x%llx invalid\n", + i, (unsigned long long) + le64_to_cpu(nsindex[i]->myoff)); + continue; + } + if (le64_to_cpu(nsindex[i]->otheroff) + != (!i) * sizeof_namespace_index(nvdimm)) { + nvdimm_debug("nsindex%d otheroff: 0x%llx invalid\n", + i, (unsigned long long) + le64_to_cpu(nsindex[i]->otheroff)); + continue; + } + + size = le64_to_cpu(nsindex[i]->mysize); + if (size > sizeof_namespace_index(nvdimm) || + size < sizeof(struct namespace_index)) { + nvdimm_debug("nsindex%d mysize: 0x%zx invalid\n", i, size); + continue; + } + + nslot = le32_to_cpu(nsindex[i]->nslot); + if (nslot * sizeof_namespace_label(nvdimm) + + 2 * sizeof_namespace_index(nvdimm) > nvdimm->lsa_size) { + nvdimm_debug("nsindex%d nslot: %u invalid, config_size: 0x%zx\n", + i, nslot, nvdimm->lsa_size); + continue; + } + valid[i] = true; + num_valid++; + } + + switch (num_valid) { + case 0: + break; + case 1: + for (i = 0; i < num_index; i++) + if (valid[i]) { + return i; + } + /* can't have num_valid > 0 but valid[] = { false, false } */ + error_report("unexpected index-block parse error"); + break; + default: + /* pick the best index... */ + seq = best_seq(le32_to_cpu(nsindex[0]->seq), + le32_to_cpu(nsindex[1]->seq)); + if (seq == (le32_to_cpu(nsindex[1]->seq) & NSINDEX_SEQ_MASK)) { + return 1; + } else { + return 0; + } + break; + } + + return -1; +} + +static int nvdimm_label_validate(struct NVDIMMDevice *nvdimm) +{ + int label_size[] = { 128, 256 }; + int i, rc; + + for (i = 0; i < ARRAY_SIZE(label_size); i++) { + nvdimm->label_size = label_size[i]; + rc = __nvdimm_label_validate(nvdimm); + if (rc >= 0) { + return rc; + } + } + + return -1; +} + +static int label_next_nsindex(int index) +{ + if (index < 0) { + return -1; + } + + return (index + 1) % 2; +} + +static void *label_base(struct NVDIMMDevice *nvdimm) +{ + void *base = to_namespace_index(nvdimm, 0); + + return base + 2 * sizeof_namespace_index(nvdimm); +} + +static int write_label_index(struct NVDIMMDevice *nvdimm, + enum ndctl_namespace_version ver, unsigned index, unsigned seq) +{ + struct namespace_index *nsindex; + unsigned long offset; + u64 checksum; + u32 nslot; + + /* + * We may have initialized ndd to whatever labelsize is + * currently on the dimm during label_validate(), so we reset it + * to the desired version here. + */ + switch (ver) { + case NDCTL_NS_VERSION_1_1: + nvdimm->label_size = 128; + break; + case NDCTL_NS_VERSION_1_2: + nvdimm->label_size = 256; + break; + default: + return -1; + } + + nsindex = to_namespace_index(nvdimm, index); + nslot = nvdimm_num_label_slots(nvdimm); + + memcpy(nsindex->sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN); + memset(nsindex->flags, 0, 3); + nsindex->labelsize = sizeof_namespace_label(nvdimm) >> 8; + nsindex->seq = cpu_to_le32(seq); + offset = (unsigned long) nsindex + - (unsigned long) to_namespace_index(nvdimm, 0); + nsindex->myoff = cpu_to_le64(offset); + nsindex->mysize = cpu_to_le64(sizeof_namespace_index(nvdimm)); + offset = (unsigned long) to_namespace_index(nvdimm, + label_next_nsindex(index)) + - (unsigned long) to_namespace_index(nvdimm, 0); + nsindex->otheroff = cpu_to_le64(offset); + offset = (unsigned long) label_base(nvdimm) + - (unsigned long) to_namespace_index(nvdimm, 0); + nsindex->labeloff = cpu_to_le64(offset); + nsindex->nslot = cpu_to_le32(nslot); + nsindex->major = cpu_to_le16(1); + if (sizeof_namespace_label(nvdimm) < 256) { + nsindex->minor = cpu_to_le16(1); + } else { + nsindex->minor = cpu_to_le16(2); + } + nsindex->checksum = cpu_to_le64(0); + /* init label bitmap */ + memset(nsindex->free, 0xff, ALIGN(nslot, BITS_PER_LONG) / 8); + checksum = fletcher64(nsindex, sizeof_namespace_index(nvdimm), 1); + nsindex->checksum = cpu_to_le64(checksum); + + return 0; +} + +static int nvdimm_init_label(struct NVDIMMDevice *nvdimm) +{ + int i; + + for (i = 0; i < 2; i++) { + int rc; + + /* To have most compatibility, we init index block with v1.1 */ + rc = write_label_index(nvdimm, NDCTL_NS_VERSION_1_1, i, 3 - i); + + if (rc < 0) { + error_report("init No.%d index block failed", i); + return rc; + } else { + nvdimm_debug("%s: dump No.%d index block\n", __func__, i); + dump_index_block(to_namespace_index(nvdimm, i)); + } + } + + return 0; +} + static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp) { NVDIMMDevice *nvdimm = NVDIMM(dimm); @@ -187,6 +532,20 @@ static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp) nvdimm_prepare_memory_region(nvdimm, errp); } + /* When LSA is designaged, validate it. */ + if (nvdimm->lsa_size != 0) { + if (buffer_is_zero(nvdimm->label_data, nvdimm->lsa_size) || + nvdimm_label_validate(nvdimm) < 0) { + int rc; + + info_report("NVDIMM LSA is invalid, needs to be initialized"); + rc = nvdimm_init_label(nvdimm); + if (rc < 0) { + error_report("NVDIMM lsa init failed, rc = %d", rc); + } + } + } + if (ndc->realize) { ndc->realize(nvdimm, errp); } diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h index 8e6a40dc7b..bc1af9248e 100644 --- a/include/hw/mem/nvdimm.h +++ b/include/hw/mem/nvdimm.h @@ -48,14 +48,76 @@ #define TYPE_NVDIMM "nvdimm" OBJECT_DECLARE_TYPE(NVDIMMDevice, NVDIMMClass, NVDIMM) +typedef uint32_t u32; +typedef uint64_t u64; +typedef uint8_t u8; +typedef uint32_t u32; + +#define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1)) + #define NVDIMM_LSA_SIZE_PROP "lsa-size" #define NVDIMM_UUID_PROP "uuid" #define NVDIMM_UNARMED_PROP "unarmed" +enum ndctl_namespace_version { + NDCTL_NS_VERSION_1_1, + NDCTL_NS_VERSION_1_2, +}; + +enum { + NSINDEX_SIG_LEN = 16, + NSINDEX_ALIGN = 256, + NSINDEX_SEQ_MASK = 0x3, + NSLABEL_UUID_LEN = 16, + NSLABEL_NAME_LEN = 64, +}; + +/** + * struct namespace_index - label set superblock + * @sig: NAMESPACE_INDEX\0 + * @flags: placeholder + * @labelsize: log2 size (v1 labels 128 bytes v2 labels 256 bytes) + * @seq: sequence number for this index + * @myoff: offset of this index in label area + * @mysize: size of this index struct + * @otheroff: offset of other index + * @labeloff: offset of first label slot + * @nslot: total number of label slots + * @major: label area major version + * @minor: label area minor version + * @checksum: fletcher64 of all fields + * @free: bitmap, nlabel bits + * + * The size of free[] is rounded up so the total struct size is a + * multiple of NSINDEX_ALIGN bytes. Any bits this allocates beyond + * nlabel bits must be zero. + */ +struct namespace_index { + uint8_t sig[NSINDEX_SIG_LEN]; + uint8_t flags[3]; + uint8_t labelsize; + uint32_t seq; + uint64_t myoff; + uint64_t mysize; + uint64_t otheroff; + uint64_t labeloff; + uint32_t nslot; + uint16_t major; + uint16_t minor; + uint64_t checksum; + uint8_t free[0]; +}; + struct NVDIMMDevice { /* private */ PCDIMMDevice parent_obj; + /* + * Label's size in LSA. Determined by Label version. 128 for v1.1, 256 + * for v1.2 + */ + unsigned int label_size; + /* public */ /* @@ -150,6 +212,48 @@ struct NVDIMMState { }; typedef struct NVDIMMState NVDIMMState; +#if (NVDIMM_DEBUG == 1) +static inline void dump_index_block(struct namespace_index *nsindex) +{ + printf("sig %s\n", nsindex->sig); + printf("flags 0x%x 0x%x 0x%x\n", nsindex->flags[0], + nsindex->flags[1], nsindex->flags[2]); + printf("labelsize %d\n", nsindex->labelsize); + printf("seq 0x%0x\n", nsindex->seq); + printf("myoff 0x%"PRIx64"\n", nsindex->myoff); + printf("mysize 0x%"PRIx64"\n", nsindex->mysize); + printf("otheroff 0x%"PRIx64"\n", nsindex->otheroff); + printf("labeloff 0x%"PRIx64"\n", nsindex->labeloff); + printf("nslot %d\n", nsindex->nslot); + printf("major %d\n", nsindex->major); + printf("minor %d\n", nsindex->minor); + printf("checksum 0x%"PRIx64"\n", nsindex->checksum); + printf("-------------------------------\n"); +} +#else +static inline void dump_index_block(struct namespace_index *nsindex) +{ +} +#endif + +/* + * Note, fletcher64() is copied from drivers/nvdimm/label.c in the Linux kernel + */ +static inline u64 fletcher64(void *addr, size_t len, bool le) +{ + u32 *buf = addr; + u32 lo32 = 0; + u64 hi32 = 0; + size_t i; + + for (i = 0; i < len / sizeof(u32); i++) { + lo32 += le ? le32_to_cpu((u32) buf[i]) : buf[i]; + hi32 += lo32; + } + + return hi32 << 32 | lo32; +} + void nvdimm_init_acpi_state(NVDIMMState *state, MemoryRegion *io, struct AcpiGenericAddress dsm_io, FWCfgState *fw_cfg, Object *owner);