From patchwork Fri Aug 14 14:52:06 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xiao Guangrong X-Patchwork-Id: 7015831 Return-Path: X-Original-To: patchwork-kvm@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork1.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.136]) by patchwork1.web.kernel.org (Postfix) with ESMTP id 2473A9F344 for ; Fri, 14 Aug 2015 14:59:35 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id B8F08207FB for ; Fri, 14 Aug 2015 14:59:33 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 09BB5207F9 for ; Fri, 14 Aug 2015 14:59:31 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755439AbbHNO7U (ORCPT ); Fri, 14 Aug 2015 10:59:20 -0400 Received: from mga11.intel.com ([192.55.52.93]:31470 "EHLO mga11.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755441AbbHNO6z (ORCPT ); Fri, 14 Aug 2015 10:58:55 -0400 Received: from orsmga001.jf.intel.com ([10.7.209.18]) by fmsmga102.fm.intel.com with ESMTP; 14 Aug 2015 07:58:53 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.15,678,1432623600"; d="scan'208";a="748633447" Received: from xiao.sh.intel.com ([10.239.159.86]) by orsmga001.jf.intel.com with ESMTP; 14 Aug 2015 07:58:35 -0700 From: Xiao Guangrong To: pbonzini@redhat.com, imammedo@redhat.com Cc: gleb@kernel.org, mtosatti@redhat.com, stefanha@redhat.com, mst@redhat.com, rth@twiddle.net, ehabkost@redhat.com, kvm@vger.kernel.org, qemu-devel@nongnu.org, Xiao Guangrong Subject: [PATCH v2 13/18] nvdimm: build namespace config data Date: Fri, 14 Aug 2015 22:52:06 +0800 Message-Id: <1439563931-12352-14-git-send-email-guangrong.xiao@linux.intel.com> X-Mailer: git-send-email 2.4.3 In-Reply-To: <1439563931-12352-1-git-send-email-guangrong.xiao@linux.intel.com> References: <1439563931-12352-1-git-send-email-guangrong.xiao@linux.intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Spam-Status: No, score=-7.9 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP If @configdata is false, Qemu will build a static and readonly namespace in memory and use it serveing for DSM GET_CONFIG_SIZE/GET_CONFIG_DATA requests Signed-off-by: Xiao Guangrong --- hw/mem/Makefile.objs | 3 +- hw/mem/nvdimm/acpi.c | 10 ++ hw/mem/nvdimm/internal.h | 12 ++ hw/mem/nvdimm/namespace.c | 307 +++++++++++++++++++++++++++++++++++++++++++++ include/hw/mem/pc-nvdimm.h | 2 + 5 files changed, 333 insertions(+), 1 deletion(-) create mode 100644 hw/mem/nvdimm/namespace.c diff --git a/hw/mem/Makefile.objs b/hw/mem/Makefile.objs index 7a6948d..7f3fab2 100644 --- a/hw/mem/Makefile.objs +++ b/hw/mem/Makefile.objs @@ -1,2 +1,3 @@ common-obj-$(CONFIG_MEM_HOTPLUG) += pc-dimm.o -common-obj-$(CONFIG_NVDIMM) += nvdimm/pc-nvdimm.o nvdimm/acpi.o +common-obj-$(CONFIG_NVDIMM) += nvdimm/pc-nvdimm.o nvdimm/acpi.o \ + nvdimm/namespace.o diff --git a/hw/mem/nvdimm/acpi.c b/hw/mem/nvdimm/acpi.c index 0b09efa..c773954 100644 --- a/hw/mem/nvdimm/acpi.c +++ b/hw/mem/nvdimm/acpi.c @@ -240,6 +240,8 @@ static void build_nfit_table(GSList *device_list, char *buf) for (; device_list; device_list = device_list->next) { PCNVDIMMDevice *nvdimm = device_list->data; + struct nfit_memdev *nfit_memdev; + struct nfit_dcr *nfit_dcr; int spa_index, dcr_index; spa_index = ++index; @@ -252,10 +254,15 @@ static void build_nfit_table(GSList *device_list, char *buf) * build Memory Device to System Physical Address Range Mapping * Table. */ + nfit_memdev = (struct nfit_memdev *)buf; buf += build_memdev_table(buf, nvdimm, spa_index, dcr_index); /* build Control Region Descriptor Table. */ + nfit_dcr = (struct nfit_dcr *)buf; buf += build_dcr_table(buf, nvdimm, dcr_index); + + calculate_nvdimm_isetcookie(nvdimm, nfit_memdev->region_spa_offset, + nfit_dcr->serial_number); } } @@ -382,6 +389,9 @@ void pc_nvdimm_build_nfit_table(GArray *table_offsets, GArray *table_data, build_header(linker, table_data, (void *)(table_data->data + nfit_start), "NFIT", table_data->len - nfit_start, 1); + + build_nvdimm_configdata(list); + exit: g_slist_free(list); } diff --git a/hw/mem/nvdimm/internal.h b/hw/mem/nvdimm/internal.h index 90d54dc..b1f3f16 100644 --- a/hw/mem/nvdimm/internal.h +++ b/hw/mem/nvdimm/internal.h @@ -13,6 +13,14 @@ #ifndef __NVDIMM_INTERNAL_H #define __NVDIMM_INTERNAL_H +/* #define NVDIMM_DEBUG */ + +#ifdef NVDIMM_DEBUG +#define nvdebug(fmt, ...) fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__) +#else +#define nvdebug(...) +#endif + #define PAGE_SIZE (1UL << 12) typedef struct { @@ -27,4 +35,8 @@ typedef struct { GSList *get_nvdimm_built_list(void); ram_addr_t reserved_range_push(uint64_t size); + +void calculate_nvdimm_isetcookie(PCNVDIMMDevice *nvdimm, uint64_t spa, + uint32_t sn); +void build_nvdimm_configdata(GSList *device_list); #endif diff --git a/hw/mem/nvdimm/namespace.c b/hw/mem/nvdimm/namespace.c new file mode 100644 index 0000000..04626da --- /dev/null +++ b/hw/mem/nvdimm/namespace.c @@ -0,0 +1,307 @@ +/* + * NVDIMM Namespace Support + * + * Copyright(C) 2015 Intel Corporation. + * + * Author: + * Xiao Guangrong + * + * NVDIMM namespace specification can be found at: + * http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + */ + +#include "hw/mem/pc-nvdimm.h" + +#include "internal.h" + +static uint64_t fletcher64(void *addr, size_t len) +{ + uint32_t *buf = addr; + uint32_t lo32 = 0; + uint64_t hi32 = 0; + int i; + + for (i = 0; i < len / sizeof(uint32_t); i++) { + lo32 += cpu_to_le32(buf[i]); + hi32 += lo32; + } + + return hi32 << 32 | lo32; +} + +struct interleave_set_info { + struct interleave_set_info_map { + uint64_t region_spa_offset; + uint32_t serial_number; + uint32_t zero; + } mapping[1]; +}; + +void calculate_nvdimm_isetcookie(PCNVDIMMDevice *nvdimm, uint64_t spa, + uint32_t sn) +{ + struct interleave_set_info info; + + info.mapping[0].region_spa_offset = spa; + info.mapping[0].serial_number = sn; + info.mapping[0].zero = 0; + + nvdimm->isetcookie = fletcher64(&info, sizeof(info)); +} + +#define NSINDEX_SIGNATURE "NAMESPACE_INDEX\0" + +enum { + NSINDEX_SIG_LEN = 16, + NSINDEX_ALIGN = 256, + NSINDEX_SEQ_MASK = 0x3, + NSINDEX_MAJOR = 0x1, + NSINDEX_MINOR = 0x1, + + NSLABEL_UUID_LEN = 16, + NSLABEL_NAME_LEN = 64, + NSLABEL_FLAG_ROLABEL = 0x1, /* read-only label */ + NSLABEL_FLAG_LOCAL = 0x2, /* DIMM-local namespace */ + NSLABEL_FLAG_BTT = 0x4, /* namespace contains a BTT */ + NSLABEL_FLAG_UPDATING = 0x8, /* label being updated */ +}; + +/* + * struct nd_namespace_index - label set superblock + * @sig: NAMESPACE_INDEX\0 + * @flags: placeholder + * @seq: sequence number for this index + * @myoff: offset of this index in label area + * @mysize: size of this index struct + * @otheroff: offset of other index + * @labeloff: offset of first label slot + * @nslot: total number of label slots + * @major: label area major version + * @minor: label area minor version + * @checksum: fletcher64 of all fields + * @free[0]: bitmap, nlabel bits + * + * The size of free[] is rounded up so the total struct size is a + * multiple of NSINDEX_ALIGN bytes. Any bits this allocates beyond + * nlabel bits must be zero. + */ +struct namespace_label_index_block { + uint8_t sig[NSINDEX_SIG_LEN]; + uint32_t flags; + uint32_t seq; + uint64_t myoff; + uint64_t mysize; + uint64_t otheroff; + uint64_t labeloff; + uint32_t nlabel; + uint16_t major; + uint16_t minor; + uint64_t checksum; + uint8_t free[0]; +} QEMU_PACKED; + +/* + * struct nd_namespace_label - namespace superblock + * @uuid: UUID per RFC 4122 + * @name: optional name (NULL-terminated) + * @flags: see NSLABEL_FLAG_* + * @nlabel: num labels to describe this ns + * @position: labels position in set + * @isetcookie: interleave set cookie + * @lbasize: LBA size in bytes or 0 for pmem + * @dpa: DPA of NVM range on this DIMM + * @rawsize: size of namespace + * @slot: slot of this label in label area + * @unused: must be zero + */ +struct namespace_label { + uint8_t uuid[NSLABEL_UUID_LEN]; + uint8_t name[NSLABEL_NAME_LEN]; + uint32_t flags; + uint16_t nlabel; + uint16_t position; + uint64_t isetcookie; + uint64_t lbasize; + uint64_t dpa; + uint64_t rawsize; + uint32_t slot; + uint32_t unused; +} QEMU_PACKED; + +/*calculate the number of label can be contained in whole config space. */ +static int config_space_max_label_nr(PCNVDIMMDevice *nvdimm, size_t block_size) +{ + /* totally we have 2 namespace label index block. */ + if (block_size * 2 >= nvdimm->config_data_size) { + return 0; + } + + return (nvdimm->config_data_size - block_size * 2) / + sizeof(struct namespace_label); +} + +/*calculate the number of label can be contained in index block. */ +static int label_index_block_max_label_nr(size_t block_size) +{ + int free_size; + + free_size = block_size - sizeof(struct namespace_label_index_block); + + return free_size * BITS_PER_BYTE; +} + +static int calculate_max_label_nr(PCNVDIMMDevice *nvdimm, size_t block_size) +{ + return MIN(label_index_block_max_label_nr(block_size), + config_space_max_label_nr(nvdimm, block_size)); +} + +/* + * check if we can increase the size of namespace_label_index_block to + * contain more labels. + */ +static bool can_increase_index_block(PCNVDIMMDevice *nvdimm, + size_t block_size, int label_nr) +{ + size_t remaining; + + remaining = nvdimm->config_data_size - block_size * 2 - + label_nr * sizeof(struct namespace_label); + + assert((int64_t)remaining >= 0); + + /* can contain 1 label at least. */ + return remaining >= NSINDEX_ALIGN * 2 + sizeof(struct namespace_label); +} + +static void count_label_nr(PCNVDIMMDevice *nvdimm, size_t *label_block_size, + int *label_nr) +{ + *label_block_size = 0; + + do { + /* + * The minimum size of an index block is 256 bytes and the size must + * be a multiple of 256 bytes. + */ + *label_block_size += NSINDEX_ALIGN; + + *label_nr = calculate_max_label_nr(nvdimm, *label_block_size); + } while (can_increase_index_block(nvdimm, *label_block_size, *label_nr)); +} + +static void namespace_label_uuid(PCNVDIMMDevice *nvdimm, void *uuid) +{ + uuid_le label_uuid_init = UUID_LE(0x137e67a9, 0x7dcb, 0x4c66, 0xb2, + 0xe6, 0x05, 0x06, 0x5b, 0xeb, + 0x6a, 0x00); + + assert(nvdimm->device_index <= 0xff); + + label_uuid_init.b[0] += nvdimm->device_index; + memcpy(uuid, &label_uuid_init, sizeof(label_uuid_init)); +} + +static void init_namespace(PCNVDIMMDevice *nvdimm) +{ + struct namespace_label_index_block *index1, *index2; + struct namespace_label *label; + int i; + + size_t label_block_size; + int label_nr; + + assert(!nvdimm->configdata); + + count_label_nr(nvdimm, &label_block_size, &label_nr); + nvdebug("nvdimm%d: label_block_size 0x%lx label_nr %d.\n", + nvdimm->device_index, label_block_size, label_nr); + + index1 = nvdimm->config_data_addr; + + /* + * init the first namespace label index block, except @otheroff + * and @checksum. we will do it later. + */ + memcpy(index1->sig, NSINDEX_SIGNATURE, sizeof(NSINDEX_SIGNATURE)); + index1->flags = cpu_to_le32(0); + index1->seq = cpu_to_le32(0x1); + index1->myoff = cpu_to_le64(0); + index1->mysize = cpu_to_le64(label_block_size); + index1->labeloff = cpu_to_le64(label_block_size * 2); + index1->nlabel = cpu_to_le32(label_nr); + index1->major = cpu_to_le16(NSINDEX_MAJOR); + index1->minor = cpu_to_le16(NSINDEX_MINOR); + index1->checksum = cpu_to_le64(0); + memset(index1->free, 0, + label_block_size - sizeof(struct namespace_label_index_block)); + + /* + * the label slot with the lowest offset in the label storage area is + * tracked by the least significant bit of the first byte of the free + * array. + * + * the fist label is used. + */ + for (i = 1; i < index1->nlabel; i++) { + set_bit(i, (unsigned long *)index1->free); + } + + /* init the second namespace label index block. */ + index2 = (void *)index1 + label_block_size; + memcpy(index2, index1, label_block_size); + index2->seq = cpu_to_le32(0x2); + index2->myoff = cpu_to_le64(label_block_size); + + /* init @otheroff and @checksume. */ + index1->otheroff = cpu_to_le64(index2->myoff); + index2->otheroff = cpu_to_le64(index1->myoff); + index1->checksum = cpu_to_le64(fletcher64(index1, label_block_size)); + index2->checksum = cpu_to_le64(fletcher64(index2, label_block_size)); + + /* only one label is used which is the first label and is readonly. */ + label = nvdimm->config_data_addr + label_block_size * 2; + namespace_label_uuid(nvdimm, label->uuid); + sprintf((char *)label->name, "QEMU NS%d", nvdimm->device_index); + label->flags = cpu_to_le32(NSLABEL_FLAG_ROLABEL); + label->nlabel = cpu_to_le16(1); + label->position = cpu_to_le16(0); + label->isetcookie = cpu_to_le64(nvdimm->isetcookie); + label->lbasize = cpu_to_le64(0); + label->dpa = cpu_to_le64(object_property_get_int(OBJECT(&nvdimm->mr), + "addr", NULL)); + label->rawsize = cpu_to_le64(memory_region_size(&nvdimm->mr)); + label->slot = cpu_to_le32(0); + label->unused = cpu_to_le32(0); + + nvdebug("nvdimm%d, checksum1 0x%lx checksum2 0x%lx isetcookie 0x%lx.\n", + nvdimm->device_index, index1->checksum, index2->checksum, + label->isetcookie); +} + +void build_nvdimm_configdata(GSList *device_list) +{ + for (; device_list; device_list = device_list->next) { + PCNVDIMMDevice *nvdimm = device_list->data; + + if (nvdimm->config_data_addr) { + return; + } + + nvdimm->config_data_addr = g_malloc(nvdimm->config_data_size); + init_namespace(nvdimm); + } +} diff --git a/include/hw/mem/pc-nvdimm.h b/include/hw/mem/pc-nvdimm.h index b7faec3..8aa7086 100644 --- a/include/hw/mem/pc-nvdimm.h +++ b/include/hw/mem/pc-nvdimm.h @@ -28,6 +28,8 @@ typedef struct PCNVDIMMDevice { uint64_t config_data_size; void *config_data_addr; + uint64_t isetcookie; + MemoryRegion mr; } PCNVDIMMDevice;