Message ID | 20211021001059.438843-6-jane.chu@oracle.com (mailing list archive) |
---|---|
State | Not Applicable, archived |
Delegated to: | Mike Snitzer |
Headers | show |
Series | dax poison recovery with RWF_RECOVERY_DATA flag | expand |
> + if (flags & DAXDEV_F_RECOVERY) { > + lead_off = (unsigned long)addr & ~PAGE_MASK; > + len = PFN_PHYS(PFN_UP(lead_off + bytes)); > + if (is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512, len)) { > + if (lead_off || !(PAGE_ALIGNED(bytes))) { > + dev_warn(dev, "Found poison, but addr(%p) and/or bytes(%#lx) not page aligned\n", > + addr, bytes); > + return (size_t) -EIO; > + } > + pmem_off = PFN_PHYS(pgoff) + pmem->data_offset; > + if (pmem_clear_poison(pmem, pmem_off, bytes) != > + BLK_STS_OK) > + return (size_t) -EIO; > + } Shouldn't this just go down in a separe ->clear_poison operation to make the whole thing a little easier to follow? -- dm-devel mailing list dm-devel@redhat.com https://listman.redhat.com/mailman/listinfo/dm-devel
On 10/21/2021 4:28 AM, Christoph Hellwig wrote: >> + if (flags & DAXDEV_F_RECOVERY) { >> + lead_off = (unsigned long)addr & ~PAGE_MASK; >> + len = PFN_PHYS(PFN_UP(lead_off + bytes)); >> + if (is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512, len)) { >> + if (lead_off || !(PAGE_ALIGNED(bytes))) { >> + dev_warn(dev, "Found poison, but addr(%p) and/or bytes(%#lx) not page aligned\n", >> + addr, bytes); >> + return (size_t) -EIO; >> + } >> + pmem_off = PFN_PHYS(pgoff) + pmem->data_offset; >> + if (pmem_clear_poison(pmem, pmem_off, bytes) != >> + BLK_STS_OK) >> + return (size_t) -EIO; >> + } > > Shouldn't this just go down in a separe ->clear_poison operation > to make the whole thing a little easier to follow? > Do you mean to lift or refactor the above to a helper function so as to improve the readability of the code? I can do that, just to confirm. On the same note, would you prefer to refactor the read path as well? thanks! -jane -- dm-devel mailing list dm-devel@redhat.com https://listman.redhat.com/mailman/listinfo/dm-devel
Hi Jane, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on device-mapper-dm/for-next] [also build test WARNING on nvdimm/libnvdimm-for-next mszeredi-fuse/for-next linus/master v5.15-rc6 next-20211021] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Jane-Chu/dax-poison-recovery-with-RWF_RECOVERY_DATA-flag/20211021-081336 base: https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git for-next config: i386-debian-10.3 (attached as .config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 reproduce (this is a W=1 build): # https://github.com/0day-ci/linux/commit/a01994a484c54b2f4b6eb32104ab3caf7b9b32a8 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Jane-Chu/dax-poison-recovery-with-RWF_RECOVERY_DATA-flag/20211021-081336 git checkout a01994a484c54b2f4b6eb32104ab3caf7b9b32a8 # save the attached .config to linux build tree make W=1 ARCH=i386 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): In file included from include/linux/device.h:15, from include/linux/blk_types.h:11, from include/linux/genhd.h:19, from include/linux/blkdev.h:8, from drivers/nvdimm/pmem.c:10: drivers/nvdimm/pmem.c: In function 'pmem_copy_from_iter': >> drivers/nvdimm/pmem.c:336:19: warning: format '%lx' expects argument of type 'long unsigned int', but argument 4 has type 'size_t' {aka 'unsigned int'} [-Wformat=] 336 | dev_warn(dev, "Found poison, but addr(%p) and/or bytes(%#lx) not page aligned\n", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/dev_printk.h:110:16: note: in definition of macro 'dev_printk_index_wrap' 110 | _p_func(dev, fmt, ##__VA_ARGS__); \ | ^~~ include/linux/dev_printk.h:146:54: note: in expansion of macro 'dev_fmt' 146 | dev_printk_index_wrap(_dev_warn, KERN_WARNING, dev, dev_fmt(fmt), ##__VA_ARGS__) | ^~~~~~~ drivers/nvdimm/pmem.c:336:5: note: in expansion of macro 'dev_warn' 336 | dev_warn(dev, "Found poison, but addr(%p) and/or bytes(%#lx) not page aligned\n", | ^~~~~~~~ drivers/nvdimm/pmem.c:336:63: note: format string is defined here 336 | dev_warn(dev, "Found poison, but addr(%p) and/or bytes(%#lx) not page aligned\n", | ~~~^ | | | long unsigned int | %#x vim +336 drivers/nvdimm/pmem.c 306 307 /* 308 * Even though the 'no check' versions of copy_from_iter_flushcache() 309 * and copy_mc_to_iter() are used to bypass HARDENED_USERCOPY overhead, 310 * 'read'/'write' aren't always safe when poison is consumed. They happen 311 * to be safe because the 'read'/'write' range has been guaranteed 312 * be free of poison(s) by a prior call to dax_direct_access() on the 313 * caller stack. 314 * However with the introduction of DAXDEV_F_RECOVERY, the 'read'/'write' 315 * range may contain poison(s), so the functions perform explicit check 316 * on poison, and 'read' end up fetching only non-poisoned page(s) up 317 * till the first poison is encountered while 'write' require the range 318 * is page aligned in order to restore the poisoned page's memory type 319 * back to "rw" after clearing the poison(s). 320 * In the event of poison related failure, (size_t) -EIO is returned and 321 * caller may check the return value after casting it to (ssize_t). 322 */ 323 static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, 324 void *addr, size_t bytes, struct iov_iter *i, unsigned long flags) 325 { 326 phys_addr_t pmem_off; 327 size_t len, lead_off; 328 struct pmem_device *pmem = dax_get_private(dax_dev); 329 struct device *dev = pmem->bb.dev; 330 331 if (flags & DAXDEV_F_RECOVERY) { 332 lead_off = (unsigned long)addr & ~PAGE_MASK; 333 len = PFN_PHYS(PFN_UP(lead_off + bytes)); 334 if (is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512, len)) { 335 if (lead_off || !(PAGE_ALIGNED(bytes))) { > 336 dev_warn(dev, "Found poison, but addr(%p) and/or bytes(%#lx) not page aligned\n", 337 addr, bytes); 338 return (size_t) -EIO; 339 } 340 pmem_off = PFN_PHYS(pgoff) + pmem->data_offset; 341 if (pmem_clear_poison(pmem, pmem_off, bytes) != 342 BLK_STS_OK) 343 return (size_t) -EIO; 344 } 345 } 346 347 return _copy_from_iter_flushcache(addr, bytes, i); 348 } 349 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org -- dm-devel mailing list dm-devel@redhat.com https://listman.redhat.com/mailman/listinfo/dm-devel
Hi Jane, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on device-mapper-dm/for-next] [also build test WARNING on mszeredi-fuse/for-next linus/master v5.15-rc7 next-20211025] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Jane-Chu/dax-poison-recovery-with-RWF_RECOVERY_DATA-flag/20211021-081336 base: https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git for-next config: i386-randconfig-a013-20211022 (attached as .config) compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 5dc339d9825f1dbe788cfb69c88210a59bbf8e3a) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/a01994a484c54b2f4b6eb32104ab3caf7b9b32a8 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Jane-Chu/dax-poison-recovery-with-RWF_RECOVERY_DATA-flag/20211021-081336 git checkout a01994a484c54b2f4b6eb32104ab3caf7b9b32a8 # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 ARCH=i386 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): >> drivers/nvdimm/pmem.c:337:12: warning: format specifies type 'unsigned long' but the argument has type 'size_t' (aka 'unsigned int') [-Wformat] addr, bytes); ^~~~~ include/linux/dev_printk.h:146:70: note: expanded from macro 'dev_warn' dev_printk_index_wrap(_dev_warn, KERN_WARNING, dev, dev_fmt(fmt), ##__VA_ARGS__) ~~~ ^~~~~~~~~~~ include/linux/dev_printk.h:110:23: note: expanded from macro 'dev_printk_index_wrap' _p_func(dev, fmt, ##__VA_ARGS__); \ ~~~ ^~~~~~~~~~~ 1 warning generated. vim +337 drivers/nvdimm/pmem.c 306 307 /* 308 * Even though the 'no check' versions of copy_from_iter_flushcache() 309 * and copy_mc_to_iter() are used to bypass HARDENED_USERCOPY overhead, 310 * 'read'/'write' aren't always safe when poison is consumed. They happen 311 * to be safe because the 'read'/'write' range has been guaranteed 312 * be free of poison(s) by a prior call to dax_direct_access() on the 313 * caller stack. 314 * However with the introduction of DAXDEV_F_RECOVERY, the 'read'/'write' 315 * range may contain poison(s), so the functions perform explicit check 316 * on poison, and 'read' end up fetching only non-poisoned page(s) up 317 * till the first poison is encountered while 'write' require the range 318 * is page aligned in order to restore the poisoned page's memory type 319 * back to "rw" after clearing the poison(s). 320 * In the event of poison related failure, (size_t) -EIO is returned and 321 * caller may check the return value after casting it to (ssize_t). 322 */ 323 static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, 324 void *addr, size_t bytes, struct iov_iter *i, unsigned long flags) 325 { 326 phys_addr_t pmem_off; 327 size_t len, lead_off; 328 struct pmem_device *pmem = dax_get_private(dax_dev); 329 struct device *dev = pmem->bb.dev; 330 331 if (flags & DAXDEV_F_RECOVERY) { 332 lead_off = (unsigned long)addr & ~PAGE_MASK; 333 len = PFN_PHYS(PFN_UP(lead_off + bytes)); 334 if (is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512, len)) { 335 if (lead_off || !(PAGE_ALIGNED(bytes))) { 336 dev_warn(dev, "Found poison, but addr(%p) and/or bytes(%#lx) not page aligned\n", > 337 addr, bytes); 338 return (size_t) -EIO; 339 } 340 pmem_off = PFN_PHYS(pgoff) + pmem->data_offset; 341 if (pmem_clear_poison(pmem, pmem_off, bytes) != 342 BLK_STS_OK) 343 return (size_t) -EIO; 344 } 345 } 346 347 return _copy_from_iter_flushcache(addr, bytes, i); 348 } 349 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org -- dm-devel mailing list dm-devel@redhat.com https://listman.redhat.com/mailman/listinfo/dm-devel
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index e2a1c35108cd..c456f84d2f6f 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -305,21 +305,83 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev, } /* - * Use the 'no check' versions of copy_from_iter_flushcache() and - * copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds - * checking, both file offset and device offset, is handled by - * dax_iomap_actor() + * Even though the 'no check' versions of copy_from_iter_flushcache() + * and copy_mc_to_iter() are used to bypass HARDENED_USERCOPY overhead, + * 'read'/'write' aren't always safe when poison is consumed. They happen + * to be safe because the 'read'/'write' range has been guaranteed + * be free of poison(s) by a prior call to dax_direct_access() on the + * caller stack. + * However with the introduction of DAXDEV_F_RECOVERY, the 'read'/'write' + * range may contain poison(s), so the functions perform explicit check + * on poison, and 'read' end up fetching only non-poisoned page(s) up + * till the first poison is encountered while 'write' require the range + * is page aligned in order to restore the poisoned page's memory type + * back to "rw" after clearing the poison(s). + * In the event of poison related failure, (size_t) -EIO is returned and + * caller may check the return value after casting it to (ssize_t). */ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i, unsigned long flags) { + phys_addr_t pmem_off; + size_t len, lead_off; + struct pmem_device *pmem = dax_get_private(dax_dev); + struct device *dev = pmem->bb.dev; + + if (flags & DAXDEV_F_RECOVERY) { + lead_off = (unsigned long)addr & ~PAGE_MASK; + len = PFN_PHYS(PFN_UP(lead_off + bytes)); + if (is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512, len)) { + if (lead_off || !(PAGE_ALIGNED(bytes))) { + dev_warn(dev, "Found poison, but addr(%p) and/or bytes(%#lx) not page aligned\n", + addr, bytes); + return (size_t) -EIO; + } + pmem_off = PFN_PHYS(pgoff) + pmem->data_offset; + if (pmem_clear_poison(pmem, pmem_off, bytes) != + BLK_STS_OK) + return (size_t) -EIO; + } + } + return _copy_from_iter_flushcache(addr, bytes, i); } static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i, unsigned long flags) { - return _copy_mc_to_iter(addr, bytes, i); + int num_bad; + size_t len, lead_off; + unsigned long bad_pfn; + bool bad_pmem = false; + size_t adj_len = bytes; + sector_t sector, first_bad; + struct pmem_device *pmem = dax_get_private(dax_dev); + struct device *dev = pmem->bb.dev; + + if (flags & DAXDEV_F_RECOVERY) { + sector = PFN_PHYS(pgoff) / 512; + lead_off = (unsigned long)addr & ~PAGE_MASK; + len = PFN_PHYS(PFN_UP(lead_off + bytes)); + if (pmem->bb.count) + bad_pmem = !!badblocks_check(&pmem->bb, sector, + len / 512, &first_bad, &num_bad); + if (bad_pmem) { + bad_pfn = PHYS_PFN(first_bad * 512); + if (bad_pfn == pgoff) { + dev_warn(dev, "Found poison in page: pgoff(%#lx)\n", + pgoff); + return -EIO; + } + adj_len = PFN_PHYS(bad_pfn - pgoff) - lead_off; + dev_WARN_ONCE(dev, (adj_len > bytes), + "out-of-range first_bad?"); + } + if (adj_len == 0) + return (size_t) -EIO; + } + + return _copy_mc_to_iter(addr, adj_len, i); } static const struct dax_operations pmem_dax_ops = { diff --git a/fs/dax.c b/fs/dax.c index 69433c6cd6c4..b9286668dc46 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1246,6 +1246,11 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr, map_len, iter, dax_flag); + if ((ssize_t)xfer == -EIO) { + ret = -EIO; + break; + } + pos += xfer; length -= xfer; done += xfer;
When DAXDEV_F_RECOVERY flag is set, pmem_copy_to_iter() shall read as much data as possible up till the first poisoned page is encountered, and pmem_copy_from_iter() shall try to clear poison(s) within the page aligned range prior to writing. Signed-off-by: Jane Chu <jane.chu@oracle.com> --- drivers/nvdimm/pmem.c | 72 ++++++++++++++++++++++++++++++++++++++++--- fs/dax.c | 5 +++ 2 files changed, 72 insertions(+), 5 deletions(-)