diff mbox

[3/3] pmem: Use the poison list to expose badblocks

Message ID 1450603122-7205-4-git-send-email-vishal@kernel.org (mailing list archive)
State Superseded
Headers show

Commit Message

Vishal Verma Dec. 20, 2015, 9:18 a.m. UTC
From: Vishal Verma <vishal.l.verma@intel.com>

Enable the gendisk badblocks feature for pmem namespaces.
If the pmem namespace being created has any known poison associated with
its physical address space, convert the poison ranges to bad sectors
exposed using the badblocks interface.

Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
---
 drivers/nvdimm/pmem.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)

Comments

kernel test robot Dec. 20, 2015, 9:31 a.m. UTC | #1
Hi Vishal,

[auto build test ERROR on linux-nvdimm/libnvdimm-for-next]
[also build test ERROR on v4.4-rc5 next-20151218]

url:    https://github.com/0day-ci/linux/commits/vishal-kernel-org/Expose-known-poison-in-SPA-ranges-to-the-block-layer/20151220-172142
base:   https://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm libnvdimm-for-next
config: x86_64-randconfig-x017-201551 (attached as .config)
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   drivers/nvdimm/pmem.c: In function 'pmem_add_badblock_range':
>> drivers/nvdimm/pmem.c:201:9: error: implicit declaration of function 'disk_set_badblocks' [-Werror=implicit-function-declaration]
       rc = disk_set_badblocks(disk, s, done);
            ^
   drivers/nvdimm/pmem.c: In function 'pmem_attach_disk':
>> drivers/nvdimm/pmem.c:314:8: error: implicit declaration of function 'disk_alloc_badblocks' [-Werror=implicit-function-declaration]
     ret = disk_alloc_badblocks(disk);
           ^
   cc1: some warnings being treated as errors

vim +/disk_set_badblocks +201 drivers/nvdimm/pmem.c

   195			sector_t s = start_sector;
   196			int rc;
   197	
   198			while (remaining) {
   199				int done = min_t(u64, remaining, INT_MAX);
   200	
 > 201				rc = disk_set_badblocks(disk, s, done);
   202				if (rc)
   203					return rc;
   204				remaining -= done;
   205				s += done;
   206			}
   207			return 0;
   208		} else
   209			return disk_set_badblocks(disk, start_sector, num_sectors);
   210	}
   211	
   212	/*
   213	 * The poison list generated during NFIT initialization may contain multiple,
   214	 * possibly overlapping ranges in the SPA (System Physical Address) space.
   215	 * Compare each of these ranges to the pmem namespace currently being
   216	 * initialized, and add badblocks for the sub-ranges that match
   217	 */
   218	static int pmem_add_poison(struct gendisk *disk, struct nvdimm_bus *nvdimm_bus)
   219	{
   220		struct pmem_device *pmem = disk->private_data;
   221		struct list_head *poison_list;
   222		struct nd_poison *pl;
   223		int rc;
   224	
   225		poison_list = nvdimm_bus_get_poison_list(nvdimm_bus);
   226		if (list_empty(poison_list))
   227			return 0;
   228	
   229		list_for_each_entry(pl, poison_list, list) {
   230			u64 pl_end = pl->start + pl->length - 1;
   231			u64 pmem_end = pmem->phys_addr + pmem->size - 1;
   232	
   233			/* Discard intervals with no intersection */
   234			if (pl_end < pmem->phys_addr)
   235				continue;
   236			if (pl->start > pmem_end)
   237				continue;
   238			/* Deal with any overlap after start of the pmem range */
   239			if (pl->start >= pmem->phys_addr) {
   240				u64 start = pl->start;
   241				u64 len;
   242	
   243				if (pl_end <= pmem_end)
   244					len = pl->length;
   245				else
   246					len = pmem->phys_addr + pmem->size - pl->start;
   247	
   248				rc = pmem_add_badblock_range(disk, start, len);
   249				if (rc)
   250					return rc;
   251				dev_info(&nvdimm_bus->dev,
   252					"Found a poison range (0x%llx, 0x%llx)\n",
   253					start, len);
   254				continue;
   255			}
   256			/* Deal with overlap for poison starting before pmem range */
   257			if (pl->start < pmem->phys_addr) {
   258				u64 start = pmem->phys_addr;
   259				u64 len;
   260	
   261				if (pl_end < pmem_end)
   262					len = pl->start + pl->length - pmem->phys_addr;
   263				else
   264					len = pmem->size;
   265	
   266				rc = pmem_add_badblock_range(disk, start, len);
   267				if (rc)
   268					return rc;
   269				dev_info(&nvdimm_bus->dev,
   270					"Found a poison range (0x%llx, 0x%llx)\n",
   271					start, len);
   272			}
   273		}
   274	
   275		return 0;
   276	}
   277	
   278	static int pmem_attach_disk(struct device *dev,
   279			struct nd_namespace_common *ndns, struct pmem_device *pmem)
   280	{
   281		struct nd_region *nd_region = to_nd_region(dev->parent);
   282		struct nvdimm_bus *nvdimm_bus;
   283		int nid = dev_to_node(dev);
   284		struct gendisk *disk;
   285		int ret;
   286	
   287		pmem->pmem_queue = blk_alloc_queue_node(GFP_KERNEL, nid);
   288		if (!pmem->pmem_queue)
   289			return -ENOMEM;
   290	
   291		blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
   292		blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE);
   293		blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
   294		blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
   295		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);
   296	
   297		disk = alloc_disk_node(0, nid);
   298		if (!disk) {
   299			blk_cleanup_queue(pmem->pmem_queue);
   300			return -ENOMEM;
   301		}
   302	
   303		disk->major		= pmem_major;
   304		disk->first_minor	= 0;
   305		disk->fops		= &pmem_fops;
   306		disk->private_data	= pmem;
   307		disk->queue		= pmem->pmem_queue;
   308		disk->flags		= GENHD_FL_EXT_DEVT;
   309		nvdimm_namespace_disk_name(ndns, disk->disk_name);
   310		disk->driverfs_dev = dev;
   311		set_capacity(disk, (pmem->size - pmem->data_offset) / 512);
   312		pmem->pmem_disk = disk;
   313	
 > 314		ret = disk_alloc_badblocks(disk);
   315		if (ret)
   316			return ret;
   317	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Dan Williams Dec. 21, 2015, 1:20 a.m. UTC | #2
On Sun, Dec 20, 2015 at 1:18 AM,  <vishal@kernel.org> wrote:
> From: Vishal Verma <vishal.l.verma@intel.com>
>
> Enable the gendisk badblocks feature for pmem namespaces.
> If the pmem namespace being created has any known poison associated with
> its physical address space, convert the poison ranges to bad sectors
> exposed using the badblocks interface.
>
> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
> ---
>  drivers/nvdimm/pmem.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 124 insertions(+)

I think we should move this new functionality to the core because
there is not much pmem driver specific.  It's all generic nvdimm-core
and block-core functionality.  The only missing information the core
routine needs is the gendisk and a data offset (if sector-zero is at
an offset from the base address range of the namespace).  Something
like:

nvdimm_namespace_disk_poison(struct nd_namespace_common *ndns,
    resource_size_t offset, struct gendisk *disk)
Verma, Vishal L Dec. 21, 2015, 6:50 p.m. UTC | #3
On Sun, 2015-12-20 at 17:20 -0800, Dan Williams wrote:
> On Sun, Dec 20, 2015 at 1:18 AM,  <vishal@kernel.org> wrote:

> > From: Vishal Verma <vishal.l.verma@intel.com>

> > 

> > Enable the gendisk badblocks feature for pmem namespaces.

> > If the pmem namespace being created has any known poison associated

> > with

> > its physical address space, convert the poison ranges to bad sectors

> > exposed using the badblocks interface.

> > 

> > Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>

> > ---

> >  drivers/nvdimm/pmem.c | 124

> > ++++++++++++++++++++++++++++++++++++++++++++++++++

> >  1 file changed, 124 insertions(+)

> 

> I think we should move this new functionality to the core because

> there is not much pmem driver specific.  It's all generic nvdimm-core

> and block-core functionality.  The only missing information the core

> routine needs is the gendisk and a data offset (if sector-zero is at

> an offset from the base address range of the namespace).  Something

> like:

> 

> nvdimm_namespace_disk_poison(struct nd_namespace_common *ndns,

>     resource_size_t offset, struct gendisk *disk)


This should be easy to do, however isn't it a bit counter-intuitive to
move this into core? The lower level drivers pmem/blk/btt are all owners
of their respective gendisks, and so doesn't it make more sense for them
to be in control of manipulating their gendisk data. Also, I can see
moving them if this was a common operation, but only pmem will ever need
to do this..

I'm not too strongly opposed to this however - the one thing that did
feel a bit awkward being in pmem was that we ask core for a struct
list_head and then walk it ourselves - pmem doesn't normally know about
the internals of nvdimm_bus, but with this we implicitly make it aware.
Dan Williams Dec. 21, 2015, 7:10 p.m. UTC | #4
On Mon, Dec 21, 2015 at 10:50 AM, Verma, Vishal L
<vishal.l.verma@intel.com> wrote:
> On Sun, 2015-12-20 at 17:20 -0800, Dan Williams wrote:
>> On Sun, Dec 20, 2015 at 1:18 AM,  <vishal@kernel.org> wrote:
>> > From: Vishal Verma <vishal.l.verma@intel.com>
>> >
>> > Enable the gendisk badblocks feature for pmem namespaces.
>> > If the pmem namespace being created has any known poison associated
>> > with
>> > its physical address space, convert the poison ranges to bad sectors
>> > exposed using the badblocks interface.
>> >
>> > Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
>> > ---
>> >  drivers/nvdimm/pmem.c | 124
>> > ++++++++++++++++++++++++++++++++++++++++++++++++++
>> >  1 file changed, 124 insertions(+)
>>
>> I think we should move this new functionality to the core because
>> there is not much pmem driver specific.  It's all generic nvdimm-core
>> and block-core functionality.  The only missing information the core
>> routine needs is the gendisk and a data offset (if sector-zero is at
>> an offset from the base address range of the namespace).  Something
>> like:
>>
>> nvdimm_namespace_disk_poison(struct nd_namespace_common *ndns,
>>     resource_size_t offset, struct gendisk *disk)
>
> This should be easy to do, however isn't it a bit counter-intuitive to
> move this into core? The lower level drivers pmem/blk/btt are all owners
> of their respective gendisks, and so doesn't it make more sense for them
> to be in control of manipulating their gendisk data. Also, I can see
> moving them if this was a common operation, but only pmem will ever need
> to do this..
>
> I'm not too strongly opposed to this however - the one thing that did
> feel a bit awkward being in pmem was that we ask core for a struct
> list_head and then walk it ourselves - pmem doesn't normally know about
> the internals of nvdimm_bus, but with this we implicitly make it aware.

Yeah, it's borderline, but teaching pmem how to walk a list that the
core built up is what triggered the suggestion.  It's true that the
pmem driver owns its gendisk, but that's why it gets to make the
decision to call the library helper or not.
Dan Williams Dec. 23, 2015, 8:28 p.m. UTC | #5
On Sun, Dec 20, 2015 at 1:18 AM,  <vishal@kernel.org> wrote:
> From: Vishal Verma <vishal.l.verma@intel.com>
>
> Enable the gendisk badblocks feature for pmem namespaces.
> If the pmem namespace being created has any known poison associated with
> its physical address space, convert the poison ranges to bad sectors
> exposed using the badblocks interface.
>
> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
> ---
>  drivers/nvdimm/pmem.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 124 insertions(+)
>
> diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> index 8ee7989..462570f 100644
> --- a/drivers/nvdimm/pmem.c
> +++ b/drivers/nvdimm/pmem.c
[..]
> @@ -196,6 +311,15 @@ static int pmem_attach_disk(struct device *dev,
>         set_capacity(disk, (pmem->size - pmem->data_offset) / 512);
>         pmem->pmem_disk = disk;
>
> +       ret = disk_alloc_badblocks(disk);
> +       if (ret)
> +               return ret;

I think we should skip allocating a bad block list in the case we find
no poison.  Then we can do a simple "if (disk->bb)" to check if there
are any bad blocks.
Verma, Vishal L Dec. 23, 2015, 8:32 p.m. UTC | #6
On Wed, 2015-12-23 at 12:28 -0800, Dan Williams wrote:
> On Sun, Dec 20, 2015 at 1:18 AM,  <vishal@kernel.org> wrote:

> > From: Vishal Verma <vishal.l.verma@intel.com>

> > 

> > Enable the gendisk badblocks feature for pmem namespaces.

> > If the pmem namespace being created has any known poison associated

> > with

> > its physical address space, convert the poison ranges to bad sectors

> > exposed using the badblocks interface.

> > 

> > Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>

> > ---

> >  drivers/nvdimm/pmem.c | 124

> > ++++++++++++++++++++++++++++++++++++++++++++++++++

> >  1 file changed, 124 insertions(+)

> > 

> > diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c

> > index 8ee7989..462570f 100644

> > --- a/drivers/nvdimm/pmem.c

> > +++ b/drivers/nvdimm/pmem.c

> [..]

> > @@ -196,6 +311,15 @@ static int pmem_attach_disk(struct device *dev,

> >         set_capacity(disk, (pmem->size - pmem->data_offset) / 512);

> >         pmem->pmem_disk = disk;

> > 

> > +       ret = disk_alloc_badblocks(disk);

> > +       if (ret)

> > +               return ret;

> 

> I think we should skip allocating a bad block list in the case we find

> no poison.  Then we can do a simple "if (disk->bb)" to check if there

> are any bad blocks.


Sounds good - I'll move this allocation into the (what will be a core)
routine that traverses the list, and make it such that we allocate
disk->bb iff a poison entry is found for _this_ pmem range.

It will mean a check at every list node, but that shouldn't matter as
this is just init-time overhead.

	-Vishal
Dan Williams Dec. 23, 2015, 8:38 p.m. UTC | #7
On Wed, Dec 23, 2015 at 12:32 PM, Verma, Vishal L
<vishal.l.verma@intel.com> wrote:
> On Wed, 2015-12-23 at 12:28 -0800, Dan Williams wrote:
>> On Sun, Dec 20, 2015 at 1:18 AM,  <vishal@kernel.org> wrote:
>> > From: Vishal Verma <vishal.l.verma@intel.com>
>> >
>> > Enable the gendisk badblocks feature for pmem namespaces.
>> > If the pmem namespace being created has any known poison associated
>> > with
>> > its physical address space, convert the poison ranges to bad sectors
>> > exposed using the badblocks interface.
>> >
>> > Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
>> > ---
>> >  drivers/nvdimm/pmem.c | 124
>> > ++++++++++++++++++++++++++++++++++++++++++++++++++
>> >  1 file changed, 124 insertions(+)
>> >
>> > diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
>> > index 8ee7989..462570f 100644
>> > --- a/drivers/nvdimm/pmem.c
>> > +++ b/drivers/nvdimm/pmem.c
>> [..]
>> > @@ -196,6 +311,15 @@ static int pmem_attach_disk(struct device *dev,
>> >         set_capacity(disk, (pmem->size - pmem->data_offset) / 512);
>> >         pmem->pmem_disk = disk;
>> >
>> > +       ret = disk_alloc_badblocks(disk);
>> > +       if (ret)
>> > +               return ret;
>>
>> I think we should skip allocating a bad block list in the case we find
>> no poison.  Then we can do a simple "if (disk->bb)" to check if there
>> are any bad blocks.
>
> Sounds good - I'll move this allocation into the (what will be a core)
> routine that traverses the list, and make it such that we allocate
> disk->bb iff a poison entry is found for _this_ pmem range.
>
> It will mean a check at every list node, but that shouldn't matter as
> this is just init-time overhead.
>

...or free it on "no poison found".
diff mbox

Patch

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 8ee7989..462570f 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -19,14 +19,18 @@ 
 #include <linux/blkdev.h>
 #include <linux/hdreg.h>
 #include <linux/init.h>
+#include <linux/list.h>
 #include <linux/platform_device.h>
 #include <linux/module.h>
 #include <linux/memory_hotplug.h>
 #include <linux/moduleparam.h>
+#include <linux/libnvdimm.h>
 #include <linux/vmalloc.h>
+#include <linux/blkdev.h>
 #include <linux/slab.h>
 #include <linux/pmem.h>
 #include <linux/nd.h>
+#include "nd-core.h"
 #include "pfn.h"
 #include "nd.h"
 
@@ -163,11 +167,122 @@  static void pmem_detach_disk(struct pmem_device *pmem)
 	blk_cleanup_queue(pmem->pmem_queue);
 }
 
+/**
+ * pmem_add_badblock_range() - Convert a physical address range to bad sectors
+ * @disk:	the disk associated with the pmem namespace
+ * @start:	start of the physical address range
+ * @length:	number of bytes of poison to be added
+ *
+ * This assumes that the range provided with (start, length) is already within
+ * the bounds of physical addresses for this namespace, i.e. lies in the
+ * interval [pmem->phys_addr, pmem->phys_addr + pmem->size)
+ */
+static int pmem_add_badblock_range(struct gendisk *disk, u64 start, u64 length)
+{
+	unsigned int sector_size = queue_logical_block_size(disk->queue);
+	struct pmem_device *pmem = disk->private_data;
+	sector_t start_sector;
+	u64 num_sectors;
+	u32 rem;
+
+	start_sector = div_u64(start - pmem->phys_addr, sector_size);
+	num_sectors = div_u64_rem(length, sector_size, &rem);
+	if (rem)
+		num_sectors++;
+
+	if (unlikely(num_sectors > (u64)INT_MAX)) {
+		u64 remaining = num_sectors;
+		sector_t s = start_sector;
+		int rc;
+
+		while (remaining) {
+			int done = min_t(u64, remaining, INT_MAX);
+
+			rc = disk_set_badblocks(disk, s, done);
+			if (rc)
+				return rc;
+			remaining -= done;
+			s += done;
+		}
+		return 0;
+	} else
+		return disk_set_badblocks(disk, start_sector, num_sectors);
+}
+
+/*
+ * The poison list generated during NFIT initialization may contain multiple,
+ * possibly overlapping ranges in the SPA (System Physical Address) space.
+ * Compare each of these ranges to the pmem namespace currently being
+ * initialized, and add badblocks for the sub-ranges that match
+ */
+static int pmem_add_poison(struct gendisk *disk, struct nvdimm_bus *nvdimm_bus)
+{
+	struct pmem_device *pmem = disk->private_data;
+	struct list_head *poison_list;
+	struct nd_poison *pl;
+	int rc;
+
+	poison_list = nvdimm_bus_get_poison_list(nvdimm_bus);
+	if (list_empty(poison_list))
+		return 0;
+
+	list_for_each_entry(pl, poison_list, list) {
+		u64 pl_end = pl->start + pl->length - 1;
+		u64 pmem_end = pmem->phys_addr + pmem->size - 1;
+
+		/* Discard intervals with no intersection */
+		if (pl_end < pmem->phys_addr)
+			continue;
+		if (pl->start > pmem_end)
+			continue;
+		/* Deal with any overlap after start of the pmem range */
+		if (pl->start >= pmem->phys_addr) {
+			u64 start = pl->start;
+			u64 len;
+
+			if (pl_end <= pmem_end)
+				len = pl->length;
+			else
+				len = pmem->phys_addr + pmem->size - pl->start;
+
+			rc = pmem_add_badblock_range(disk, start, len);
+			if (rc)
+				return rc;
+			dev_info(&nvdimm_bus->dev,
+				"Found a poison range (0x%llx, 0x%llx)\n",
+				start, len);
+			continue;
+		}
+		/* Deal with overlap for poison starting before pmem range */
+		if (pl->start < pmem->phys_addr) {
+			u64 start = pmem->phys_addr;
+			u64 len;
+
+			if (pl_end < pmem_end)
+				len = pl->start + pl->length - pmem->phys_addr;
+			else
+				len = pmem->size;
+
+			rc = pmem_add_badblock_range(disk, start, len);
+			if (rc)
+				return rc;
+			dev_info(&nvdimm_bus->dev,
+				"Found a poison range (0x%llx, 0x%llx)\n",
+				start, len);
+		}
+	}
+
+	return 0;
+}
+
 static int pmem_attach_disk(struct device *dev,
 		struct nd_namespace_common *ndns, struct pmem_device *pmem)
 {
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct nvdimm_bus *nvdimm_bus;
 	int nid = dev_to_node(dev);
 	struct gendisk *disk;
+	int ret;
 
 	pmem->pmem_queue = blk_alloc_queue_node(GFP_KERNEL, nid);
 	if (!pmem->pmem_queue)
@@ -196,6 +311,15 @@  static int pmem_attach_disk(struct device *dev,
 	set_capacity(disk, (pmem->size - pmem->data_offset) / 512);
 	pmem->pmem_disk = disk;
 
+	ret = disk_alloc_badblocks(disk);
+	if (ret)
+		return ret;
+
+	nvdimm_bus = to_nvdimm_bus(nd_region->dev.parent);
+	ret = pmem_add_poison(disk, nvdimm_bus);
+	if (ret)
+		return ret;
+
 	add_disk(disk);
 	revalidate_disk(disk);