@@ -1424,6 +1424,9 @@ source "mm/Kconfig"
config X86_PMEM_LEGACY
bool "Support non-standard NVDIMMs and ADR protected memory"
+ depends on PHYS_ADDR_T_64BIT
+ depends on BLK_DEV
+ select LIBNVDIMM
help
Treat memory marked using the non-standard e820 type of 12 as used
by the Intel Sandy Bridge-EP reference BIOS as protected memory.
@@ -1,53 +1,81 @@
/*
* Copyright (c) 2015, Christoph Hellwig.
+ * Copyright (c) 2015, Intel Corporation.
*/
-#include <linux/memblock.h>
#include <linux/platform_device.h>
-#include <linux/slab.h>
+#include <linux/libnvdimm.h>
+#include <linux/module.h>
#include <asm/e820.h>
-#include <asm/page_types.h>
-#include <asm/setup.h>
-static __init void register_pmem_device(struct resource *res)
+static void e820_pmem_release(struct device *dev)
{
- struct platform_device *pdev;
- int error;
+ struct nvdimm_bus *nvdimm_bus = dev->platform_data;
- pdev = platform_device_alloc("pmem", PLATFORM_DEVID_AUTO);
- if (!pdev)
- return;
+ if (nvdimm_bus)
+ nvdimm_bus_unregister(nvdimm_bus);
+}
- error = platform_device_add_resources(pdev, res, 1);
- if (error)
- goto out_put_pdev;
+static struct platform_device e820_pmem = {
+ .name = "e820_pmem",
+ .id = -1,
+ .dev = {
+ .release = e820_pmem_release,
+ },
+};
- error = platform_device_add(pdev);
- if (error)
- goto out_put_pdev;
- return;
+static const struct attribute_group *e820_pmem_attribute_groups[] = {
+ &nvdimm_bus_attribute_group,
+ NULL,
+};
-out_put_pdev:
- dev_warn(&pdev->dev, "failed to add 'pmem' (persistent memory) device!\n");
- platform_device_put(pdev);
-}
+static const struct attribute_group *e820_pmem_region_attribute_groups[] = {
+ &nd_region_attribute_group,
+ &nd_device_attribute_group,
+ NULL,
+};
-static __init int register_pmem_devices(void)
+static __init int register_e820_pmem(void)
{
- int i;
+ static struct nvdimm_bus_descriptor nd_desc;
+ struct device *dev = &e820_pmem.dev;
+ struct nvdimm_bus *nvdimm_bus;
+ int rc, i;
+
+ rc = platform_device_register(&e820_pmem);
+ if (rc)
+ return rc;
+
+ nd_desc.attr_groups = e820_pmem_attribute_groups;
+ nd_desc.provider_name = "e820";
+ nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
+ if (!nvdimm_bus)
+ goto err;
+ dev->platform_data = nvdimm_bus;
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
+ struct resource res = {
+ .flags = IORESOURCE_MEM,
+ .start = ei->addr,
+ .end = ei->addr + ei->size - 1,
+ };
+ struct nd_region_desc ndr_desc;
+
+ if (ei->type != E820_PRAM)
+ continue;
- if (ei->type == E820_PRAM) {
- struct resource res = {
- .flags = IORESOURCE_MEM,
- .start = ei->addr,
- .end = ei->addr + ei->size - 1,
- };
- register_pmem_device(&res);
- }
+ memset(&ndr_desc, 0, sizeof(ndr_desc));
+ ndr_desc.res = &res;
+ ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
+ if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
+ goto err;
}
return 0;
+
+ err:
+ dev_err(dev, "failed to register legacy persistent memory ranges\n");
+ platform_device_unregister(&e820_pmem);
+ return -ENXIO;
}
-device_initcall(register_pmem_devices);
+late_initcall(register_e820_pmem);
@@ -404,17 +404,6 @@ config BLK_DEV_RAM_DAX
and will prevent RAM block device backing store memory from being
allocated from highmem (only a problem for highmem systems).
-config BLK_DEV_PMEM
- tristate "Persistent memory block device support"
- help
- Saying Y here will allow you to use a contiguous range of reserved
- memory as one or more persistent block devices.
-
- To compile this driver as a module, choose M here: the module will be
- called 'pmem'.
-
- If unsure, say N.
-
config CDROM_PKTCDVD
tristate "Packet writing on CD/DVD media"
depends on !UML
@@ -14,7 +14,6 @@ obj-$(CONFIG_PS3_VRAM) += ps3vram.o
obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o
obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o
obj-$(CONFIG_BLK_DEV_RAM) += brd.o
-obj-$(CONFIG_BLK_DEV_PMEM) += pmem.o
obj-$(CONFIG_BLK_DEV_LOOP) += loop.o
obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o
obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o
deleted file mode 100644
@@ -1,262 +0,0 @@
-/*
- * Persistent Memory Driver
- *
- * Copyright (c) 2014, Intel Corporation.
- * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>.
- * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- */
-
-#include <asm/cacheflush.h>
-#include <linux/blkdev.h>
-#include <linux/hdreg.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/slab.h>
-
-#define PMEM_MINORS 16
-
-struct pmem_device {
- struct request_queue *pmem_queue;
- struct gendisk *pmem_disk;
-
- /* One contiguous memory region per device */
- phys_addr_t phys_addr;
- void *virt_addr;
- size_t size;
-};
-
-static int pmem_major;
-static atomic_t pmem_index;
-
-static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
- unsigned int len, unsigned int off, int rw,
- sector_t sector)
-{
- void *mem = kmap_atomic(page);
- size_t pmem_off = sector << 9;
-
- if (rw == READ) {
- memcpy(mem + off, pmem->virt_addr + pmem_off, len);
- flush_dcache_page(page);
- } else {
- flush_dcache_page(page);
- memcpy(pmem->virt_addr + pmem_off, mem + off, len);
- }
-
- kunmap_atomic(mem);
-}
-
-static void pmem_make_request(struct request_queue *q, struct bio *bio)
-{
- struct block_device *bdev = bio->bi_bdev;
- struct pmem_device *pmem = bdev->bd_disk->private_data;
- int rw;
- struct bio_vec bvec;
- sector_t sector;
- struct bvec_iter iter;
- int err = 0;
-
- if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) {
- err = -EIO;
- goto out;
- }
-
- BUG_ON(bio->bi_rw & REQ_DISCARD);
-
- rw = bio_data_dir(bio);
- sector = bio->bi_iter.bi_sector;
- bio_for_each_segment(bvec, bio, iter) {
- pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset,
- rw, sector);
- sector += bvec.bv_len >> 9;
- }
-
-out:
- bio_endio(bio, err);
-}
-
-static int pmem_rw_page(struct block_device *bdev, sector_t sector,
- struct page *page, int rw)
-{
- struct pmem_device *pmem = bdev->bd_disk->private_data;
-
- pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector);
- page_endio(page, rw & WRITE, 0);
-
- return 0;
-}
-
-static long pmem_direct_access(struct block_device *bdev, sector_t sector,
- void **kaddr, unsigned long *pfn, long size)
-{
- struct pmem_device *pmem = bdev->bd_disk->private_data;
- size_t offset = sector << 9;
-
- if (!pmem)
- return -ENODEV;
-
- *kaddr = pmem->virt_addr + offset;
- *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
-
- return pmem->size - offset;
-}
-
-static const struct block_device_operations pmem_fops = {
- .owner = THIS_MODULE,
- .rw_page = pmem_rw_page,
- .direct_access = pmem_direct_access,
-};
-
-static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res)
-{
- struct pmem_device *pmem;
- struct gendisk *disk;
- int idx, err;
-
- err = -ENOMEM;
- pmem = kzalloc(sizeof(*pmem), GFP_KERNEL);
- if (!pmem)
- goto out;
-
- pmem->phys_addr = res->start;
- pmem->size = resource_size(res);
-
- err = -EINVAL;
- if (!request_mem_region(pmem->phys_addr, pmem->size, "pmem")) {
- dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", &pmem->phys_addr, pmem->size);
- goto out_free_dev;
- }
-
- /*
- * Map the memory as non-cachable, as we can't write back the contents
- * of the CPU caches in case of a crash.
- */
- err = -ENOMEM;
- pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size);
- if (!pmem->virt_addr)
- goto out_release_region;
-
- pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL);
- if (!pmem->pmem_queue)
- goto out_unmap;
-
- blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
- blk_queue_max_hw_sectors(pmem->pmem_queue, 1024);
- blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
-
- disk = alloc_disk(PMEM_MINORS);
- if (!disk)
- goto out_free_queue;
-
- idx = atomic_inc_return(&pmem_index) - 1;
-
- disk->major = pmem_major;
- disk->first_minor = PMEM_MINORS * idx;
- disk->fops = &pmem_fops;
- disk->private_data = pmem;
- disk->queue = pmem->pmem_queue;
- disk->flags = GENHD_FL_EXT_DEVT;
- sprintf(disk->disk_name, "pmem%d", idx);
- disk->driverfs_dev = dev;
- set_capacity(disk, pmem->size >> 9);
- pmem->pmem_disk = disk;
-
- add_disk(disk);
-
- return pmem;
-
-out_free_queue:
- blk_cleanup_queue(pmem->pmem_queue);
-out_unmap:
- iounmap(pmem->virt_addr);
-out_release_region:
- release_mem_region(pmem->phys_addr, pmem->size);
-out_free_dev:
- kfree(pmem);
-out:
- return ERR_PTR(err);
-}
-
-static void pmem_free(struct pmem_device *pmem)
-{
- del_gendisk(pmem->pmem_disk);
- put_disk(pmem->pmem_disk);
- blk_cleanup_queue(pmem->pmem_queue);
- iounmap(pmem->virt_addr);
- release_mem_region(pmem->phys_addr, pmem->size);
- kfree(pmem);
-}
-
-static int pmem_probe(struct platform_device *pdev)
-{
- struct pmem_device *pmem;
- struct resource *res;
-
- if (WARN_ON(pdev->num_resources > 1))
- return -ENXIO;
-
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res)
- return -ENXIO;
-
- pmem = pmem_alloc(&pdev->dev, res);
- if (IS_ERR(pmem))
- return PTR_ERR(pmem);
-
- platform_set_drvdata(pdev, pmem);
-
- return 0;
-}
-
-static int pmem_remove(struct platform_device *pdev)
-{
- struct pmem_device *pmem = platform_get_drvdata(pdev);
-
- pmem_free(pmem);
- return 0;
-}
-
-static struct platform_driver pmem_driver = {
- .probe = pmem_probe,
- .remove = pmem_remove,
- .driver = {
- .owner = THIS_MODULE,
- .name = "pmem",
- },
-};
-
-static int __init pmem_init(void)
-{
- int error;
-
- pmem_major = register_blkdev(0, "pmem");
- if (pmem_major < 0)
- return pmem_major;
-
- error = platform_driver_register(&pmem_driver);
- if (error)
- unregister_blkdev(pmem_major, "pmem");
- return error;
-}
-module_init(pmem_init);
-
-static void pmem_exit(void)
-{
- platform_driver_unregister(&pmem_driver);
- unregister_blkdev(pmem_major, "pmem");
-}
-module_exit(pmem_exit);
-
-MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
-MODULE_LICENSE("GPL v2");
@@ -1,4 +1,4 @@
-config LIBNVDIMM
+menuconfig LIBNVDIMM
tristate "NVDIMM (Non-Volatile Memory Device) Support"
help
Generic support for non-volatile memory devices including
@@ -11,3 +11,23 @@ config LIBNVDIMM
CONFIG_DAX). A BLK namespace refers to an NVDIMM control
region which exposes an mmio register set for windowed
access mode to non-volatile memory.
+
+if LIBNVDIMM
+
+config BLK_DEV_PMEM
+ tristate "PMEM: Persistent memory block device support"
+ default LIBNVDIMM
+ help
+ Memory ranges for PMEM are described by either an NFIT
+ (NVDIMM Firmware Interface Table, see CONFIG_NFIT_ACPI), a
+ non-standard OEM-specific E820 memory type (type-12, see
+ CONFIG_X86_PMEM_LEGACY), or it is manually specified by the
+ 'memmap=nn[KMG]!ss[KMG]' kernel command line (see
+ Documentation/kernel-parameters.txt). This driver converts
+ these persistent memory ranges into block devices that are
+ capable of DAX (direct-access) file system mappings. See
+ Documentation/blockdev/nd.txt for more details.
+
+ Say Y if you want to use a NVDIMM described by NFIT
+
+endif
@@ -1,4 +1,7 @@
obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
+obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
+
+nd_pmem-y := pmem.o
libnvdimm-y := core.o
libnvdimm-y += bus.o
new file mode 100644
@@ -0,0 +1,260 @@
+/*
+ * Persistent Memory Driver
+ *
+ * Copyright (c) 2014-2015, Intel Corporation.
+ * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>.
+ * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <asm/cacheflush.h>
+#include <linux/blkdev.h>
+#include <linux/hdreg.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/nd.h>
+#include "nd.h"
+
+#define PMEM_MINORS 16
+
+struct pmem_device {
+ struct request_queue *pmem_queue;
+ struct gendisk *pmem_disk;
+
+ /* One contiguous memory region per device */
+ phys_addr_t phys_addr;
+ void *virt_addr;
+ size_t size;
+};
+
+static int pmem_major;
+
+static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
+ unsigned int len, unsigned int off, int rw,
+ sector_t sector)
+{
+ void *mem = kmap_atomic(page);
+ size_t pmem_off = sector << 9;
+
+ if (rw == READ) {
+ memcpy(mem + off, pmem->virt_addr + pmem_off, len);
+ flush_dcache_page(page);
+ } else {
+ flush_dcache_page(page);
+ memcpy(pmem->virt_addr + pmem_off, mem + off, len);
+ }
+
+ kunmap_atomic(mem);
+}
+
+static void pmem_make_request(struct request_queue *q, struct bio *bio)
+{
+ struct block_device *bdev = bio->bi_bdev;
+ struct pmem_device *pmem = bdev->bd_disk->private_data;
+ int rw;
+ struct bio_vec bvec;
+ sector_t sector;
+ struct bvec_iter iter;
+ int err = 0;
+
+ if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) {
+ err = -EIO;
+ goto out;
+ }
+
+ BUG_ON(bio->bi_rw & REQ_DISCARD);
+
+ rw = bio_data_dir(bio);
+ sector = bio->bi_iter.bi_sector;
+ bio_for_each_segment(bvec, bio, iter) {
+ pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset,
+ rw, sector);
+ sector += bvec.bv_len >> 9;
+ }
+
+out:
+ bio_endio(bio, err);
+}
+
+static int pmem_rw_page(struct block_device *bdev, sector_t sector,
+ struct page *page, int rw)
+{
+ struct pmem_device *pmem = bdev->bd_disk->private_data;
+
+ pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector);
+ page_endio(page, rw & WRITE, 0);
+
+ return 0;
+}
+
+static long pmem_direct_access(struct block_device *bdev, sector_t sector,
+ void **kaddr, unsigned long *pfn, long size)
+{
+ struct pmem_device *pmem = bdev->bd_disk->private_data;
+ size_t offset = sector << 9;
+
+ if (!pmem)
+ return -ENODEV;
+
+ *kaddr = pmem->virt_addr + offset;
+ *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
+
+ return pmem->size - offset;
+}
+
+static const struct block_device_operations pmem_fops = {
+ .owner = THIS_MODULE,
+ .rw_page = pmem_rw_page,
+ .direct_access = pmem_direct_access,
+};
+
+static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res, int id)
+{
+ struct pmem_device *pmem;
+ struct gendisk *disk;
+ int err;
+
+ err = -ENOMEM;
+ pmem = kzalloc(sizeof(*pmem), GFP_KERNEL);
+ if (!pmem)
+ goto out;
+
+ pmem->phys_addr = res->start;
+ pmem->size = resource_size(res);
+
+ err = -EINVAL;
+ if (!request_mem_region(pmem->phys_addr, pmem->size, "pmem")) {
+ dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", &pmem->phys_addr, pmem->size);
+ goto out_free_dev;
+ }
+
+ /*
+ * Map the memory as non-cachable, as we can't write back the contents
+ * of the CPU caches in case of a crash.
+ */
+ err = -ENOMEM;
+ pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size);
+ if (!pmem->virt_addr)
+ goto out_release_region;
+
+ pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL);
+ if (!pmem->pmem_queue)
+ goto out_unmap;
+
+ blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
+ blk_queue_max_hw_sectors(pmem->pmem_queue, 1024);
+ blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
+
+ disk = alloc_disk(PMEM_MINORS);
+ if (!disk)
+ goto out_free_queue;
+
+ disk->major = pmem_major;
+ disk->first_minor = PMEM_MINORS * id;
+ disk->fops = &pmem_fops;
+ disk->private_data = pmem;
+ disk->queue = pmem->pmem_queue;
+ disk->flags = GENHD_FL_EXT_DEVT;
+ sprintf(disk->disk_name, "pmem%d", id);
+ disk->driverfs_dev = dev;
+ set_capacity(disk, pmem->size >> 9);
+ pmem->pmem_disk = disk;
+
+ add_disk(disk);
+
+ return pmem;
+
+out_free_queue:
+ blk_cleanup_queue(pmem->pmem_queue);
+out_unmap:
+ iounmap(pmem->virt_addr);
+out_release_region:
+ release_mem_region(pmem->phys_addr, pmem->size);
+out_free_dev:
+ kfree(pmem);
+out:
+ return ERR_PTR(err);
+}
+
+static void pmem_free(struct pmem_device *pmem)
+{
+ del_gendisk(pmem->pmem_disk);
+ put_disk(pmem->pmem_disk);
+ blk_cleanup_queue(pmem->pmem_queue);
+ iounmap(pmem->virt_addr);
+ release_mem_region(pmem->phys_addr, pmem->size);
+ kfree(pmem);
+}
+
+static int nd_pmem_probe(struct device *dev)
+{
+ struct nd_region *nd_region = to_nd_region(dev->parent);
+ struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
+ struct pmem_device *pmem;
+
+ pmem = pmem_alloc(dev, &nsio->res, nd_region->id);
+ if (IS_ERR(pmem))
+ return PTR_ERR(pmem);
+
+ dev_set_drvdata(dev, pmem);
+
+ return 0;
+}
+
+static int nd_pmem_remove(struct device *dev)
+{
+ struct pmem_device *pmem = dev_get_drvdata(dev);
+
+ pmem_free(pmem);
+ return 0;
+}
+
+MODULE_ALIAS("pmem");
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
+static struct nd_device_driver nd_pmem_driver = {
+ .probe = nd_pmem_probe,
+ .remove = nd_pmem_remove,
+ .drv = {
+ .name = "nd_pmem",
+ },
+ .type = ND_DRIVER_NAMESPACE_IO,
+};
+
+static int __init pmem_init(void)
+{
+ int error;
+
+ pmem_major = register_blkdev(0, "pmem");
+ if (pmem_major < 0)
+ return pmem_major;
+
+ error = nd_driver_register(&nd_pmem_driver);
+ if (error) {
+ unregister_blkdev(pmem_major, "pmem");
+ return error;
+ }
+
+ return 0;
+}
+module_init(pmem_init);
+
+static void pmem_exit(void)
+{
+ driver_unregister(&nd_pmem_driver.drv);
+ unregister_blkdev(pmem_major, "pmem");
+}
+module_exit(pmem_exit);
+
+MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
+MODULE_LICENSE("GPL v2");