@@ -2621,6 +2621,28 @@ int xc_nvdimm_pmem_get_regions(xc_interface *xch, uint8_t type,
int xc_nvdimm_pmem_setup_mgmt(xc_interface *xch,
unsigned long smfn, unsigned long emfn);
+/*
+ * Setup the specified PMEM pages for guest data usage. If success,
+ * these PMEM page can be mapped to guest and be used as the backend
+ * of vNDIMM devices.
+ *
+ * Parameters:
+ * xch: xc interface handle
+ * smfn, emfn: the start and end of the PMEM region
+ * mgmt_smfn,
+
+ * mgmt_emfn: the start and the end MFN of the PMEM region that is
+ * used to manage this PMEM region. It must be in one of
+ * those added by xc_nvdimm_pmem_setup_mgmt() calls, and
+ * not overlap with @smfn - @emfn.
+ *
+ * Return:
+ * On success, return 0. Otherwise, return a non-zero error code.
+ */
+int xc_nvdimm_pmem_setup_data(xc_interface *xch,
+ unsigned long smfn, unsigned long emfn,
+ unsigned long mgmt_smfn, unsigned long mgmt_emfn);
+
/* Compat shims */
#include "xenctrl_compat.h"
@@ -1019,6 +1019,23 @@ int xc_nvdimm_pmem_setup_mgmt(xc_interface *xch,
return rc;
}
+int xc_nvdimm_pmem_setup_data(xc_interface *xch,
+ unsigned long smfn, unsigned long emfn,
+ unsigned long mgmt_smfn, unsigned long mgmt_emfn)
+{
+ DECLARE_SYSCTL;
+ int rc;
+
+ xc_nvdimm_pmem_setup_common(&sysctl, smfn, emfn, mgmt_smfn, mgmt_emfn);
+ sysctl.u.nvdimm.u.pmem_setup.type = PMEM_REGION_TYPE_DATA;
+
+ rc = do_sysctl(xch, &sysctl);
+ if ( rc && sysctl.u.nvdimm.err )
+ rc = -sysctl.u.nvdimm.err;
+
+ return rc;
+}
+
/*
* Local variables:
* mode: C
@@ -34,16 +34,26 @@ static unsigned int nr_raw_regions;
/*
* All PMEM regions reserved for management purpose are linked to this
* list. All of them must be covered by one or multiple PMEM regions
- * in list pmem_raw_regions.
+ * in list pmem_raw_regions, and not appear in list pmem_data_regions.
*/
static LIST_HEAD(pmem_mgmt_regions);
static DEFINE_SPINLOCK(pmem_mgmt_lock);
static unsigned int nr_mgmt_regions;
+/*
+ * All PMEM regions that can be mapped to guest are linked to this
+ * list. All of them must be covered by one or multiple PMEM regions
+ * in list pmem_raw_regions, and not appear in list pmem_mgmt_regions.
+ */
+static LIST_HEAD(pmem_data_regions);
+static DEFINE_SPINLOCK(pmem_data_lock);
+static unsigned int nr_data_regions;
+
struct pmem {
struct list_head link; /* link to one of PMEM region list */
unsigned long smfn; /* start MFN of the PMEM region */
unsigned long emfn; /* end MFN of the PMEM region */
+ spinlock_t lock;
union {
struct {
@@ -53,6 +63,11 @@ struct pmem {
struct {
unsigned long used; /* # of used pages in MGMT PMEM region */
} mgmt;
+
+ struct {
+ unsigned long mgmt_smfn; /* start MFN of management region */
+ unsigned long mgmt_emfn; /* end MFN of management region */
+ } data;
} u;
};
@@ -111,6 +126,7 @@ static int pmem_list_add(struct list_head *list,
}
new_pmem->smfn = smfn;
new_pmem->emfn = emfn;
+ spin_lock_init(&new_pmem->lock);
list_add(&new_pmem->link, cur);
out:
@@ -261,9 +277,16 @@ static int pmem_get_regions(xen_sysctl_nvdimm_pmem_regions_t *regions)
static bool check_mgmt_size(unsigned long mgmt_mfns, unsigned long total_mfns)
{
- return mgmt_mfns >=
+ unsigned long required =
((sizeof(struct page_info) * total_mfns) >> PAGE_SHIFT) +
((sizeof(*machine_to_phys_mapping) * total_mfns) >> PAGE_SHIFT);
+
+ if ( required > mgmt_mfns )
+ printk(XENLOG_DEBUG "PMEM: insufficient management pages, "
+ "0x%lx pages required, 0x%lx pages available\n",
+ required, mgmt_mfns);
+
+ return mgmt_mfns >= required;
}
static bool check_address_and_pxm(unsigned long smfn, unsigned long emfn,
@@ -341,6 +364,93 @@ static int pmem_setup_mgmt(unsigned long smfn, unsigned long emfn)
return rc;
}
+static struct pmem *find_mgmt_region(unsigned long smfn, unsigned long emfn)
+{
+ struct list_head *cur;
+
+ ASSERT(spin_is_locked(&pmem_mgmt_lock));
+
+ list_for_each(cur, &pmem_mgmt_regions)
+ {
+ struct pmem *mgmt = list_entry(cur, struct pmem, link);
+
+ if ( smfn >= mgmt->smfn && emfn <= mgmt->emfn )
+ return mgmt;
+ }
+
+ return NULL;
+}
+
+static int pmem_setup_data(unsigned long smfn, unsigned long emfn,
+ unsigned long mgmt_smfn, unsigned long mgmt_emfn)
+{
+ struct pmem *data, *mgmt = NULL;
+ unsigned long used_mgmt_mfns;
+ unsigned int pxm;
+ int rc;
+
+ if ( smfn == mfn_x(INVALID_MFN) || emfn == mfn_x(INVALID_MFN) ||
+ smfn >= emfn )
+ return -EINVAL;
+
+ /*
+ * Require the PMEM region in one proximity domain, in order to
+ * avoid the error recovery from multiple calls to pmem_arch_setup()
+ * which is not revertible.
+ */
+ if ( !check_address_and_pxm(smfn, emfn, &pxm) )
+ return -EINVAL;
+
+ if ( mgmt_smfn == mfn_x(INVALID_MFN) || mgmt_emfn == mfn_x(INVALID_MFN) ||
+ mgmt_smfn >= mgmt_emfn )
+ return -EINVAL;
+
+ spin_lock(&pmem_mgmt_lock);
+ mgmt = find_mgmt_region(mgmt_smfn, mgmt_emfn);
+ if ( !mgmt )
+ {
+ spin_unlock(&pmem_mgmt_lock);
+ return -ENXIO;
+ }
+ spin_unlock(&pmem_mgmt_lock);
+
+ spin_lock(&mgmt->lock);
+
+ if ( mgmt_smfn < mgmt->smfn + mgmt->u.mgmt.used ||
+ !check_mgmt_size(mgmt_emfn - mgmt_smfn, emfn - smfn) )
+ {
+ spin_unlock(&mgmt->lock);
+ return -ENOSPC;
+ }
+
+ spin_lock(&pmem_data_lock);
+
+ rc = pmem_list_add(&pmem_data_regions, smfn, emfn, &data);
+ if ( rc )
+ goto out;
+ data->u.data.mgmt_smfn = data->u.data.mgmt_emfn = mfn_x(INVALID_MFN);
+
+ rc = pmem_arch_setup(smfn, emfn, pxm,
+ mgmt_smfn, mgmt_emfn, &used_mgmt_mfns);
+ if ( rc )
+ {
+ pmem_list_del(data);
+ goto out;
+ }
+
+ mgmt->u.mgmt.used = mgmt_smfn - mgmt->smfn + used_mgmt_mfns;
+ data->u.data.mgmt_smfn = mgmt_smfn;
+ data->u.data.mgmt_emfn = mgmt->smfn + mgmt->u.mgmt.used;
+
+ nr_data_regions++;
+
+ out:
+ spin_unlock(&pmem_data_lock);
+ spin_unlock(&mgmt->lock);
+
+ return rc;
+}
+
static int pmem_setup(unsigned long smfn, unsigned long emfn,
unsigned long mgmt_smfn, unsigned long mgmt_emfn,
unsigned int type)
@@ -360,6 +470,10 @@ static int pmem_setup(unsigned long smfn, unsigned long emfn,
break;
+ case PMEM_REGION_TYPE_DATA:
+ rc = pmem_setup_data(smfn, emfn, mgmt_smfn, mgmt_emfn);
+ break;
+
default:
rc = -EINVAL;
}
@@ -1121,6 +1121,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_sysctl_set_parameter_t);
/* Types of PMEM regions */
#define PMEM_REGION_TYPE_RAW 0 /* PMEM regions detected by Xen */
#define PMEM_REGION_TYPE_MGMT 1 /* PMEM regions for management usage */
+#define PMEM_REGION_TYPE_DATA 2 /* PMEM regions for guest data */
/* PMEM_REGION_TYPE_RAW */
struct xen_sysctl_nvdimm_pmem_raw_region {
@@ -1176,7 +1177,7 @@ struct xen_sysctl_nvdimm_pmem_setup {
/* above PMEM region. If the above PMEM region is */
/* a management region, mgmt_{s,e}mfn is required */
/* to be identical to {s,e}mfn. */
- uint8_t type; /* Only PMEM_REGION_TYPE_MGMT is supported now */
+ uint8_t type; /* Must be one of PMEM_REGION_TYPE_{MGMT, DATA} */
};
typedef struct xen_sysctl_nvdimm_pmem_setup xen_sysctl_nvdimm_pmem_setup_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_nvdimm_pmem_setup_t);
Allow the command XEN_SYSCTL_nvdimm_pmem_setup of hypercall XEN_SYSCTL_nvdimm_op to setup a PMEM region for guest data usage. After the setup, that PMEM region will be able to be mapped to guest address space. Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com> --- Cc: Ian Jackson <ian.jackson@eu.citrix.com> Cc: Wei Liu <wei.liu2@citrix.com> Cc: Andrew Cooper <andrew.cooper3@citrix.com> Cc: Jan Beulich <jbeulich@suse.com> --- tools/libxc/include/xenctrl.h | 22 ++++++++ tools/libxc/xc_misc.c | 17 ++++++ xen/common/pmem.c | 118 +++++++++++++++++++++++++++++++++++++++++- xen/include/public/sysctl.h | 3 +- 4 files changed, 157 insertions(+), 3 deletions(-)