Message ID | 20211106011638.2613039-2-jane.chu@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Dax poison recovery | expand |
On Fri, Nov 05, 2021 at 07:16:37PM -0600, Jane Chu wrote: > Introduce DAX_OP_NORMAL and DAX_OP_RECOVERY operation modes to > {dax_direct_access, dax_copy_from_iter, dax_copy_to_iter}. > DAX_OP_NORMAL is the default or the existing mode, and > DAX_OP_RECOVERY is a new mode for data recovery purpose. > > When dax-FS suspects dax media error might be encountered > on a read or write, it can enact the recovery mode read or write > by setting DAX_OP_RECOVERY in the aforementioned APIs. A read > in recovery mode attempts to fetch as much data as possible > until the first poisoned page is encountered. A write in recovery > mode attempts to clear poison(s) in a page-aligned range and > then write the user provided data over. > > DAX_OP_NORMAL should be used for all non-recovery code path. > > Signed-off-by: Jane Chu <jane.chu@oracle.com> > --- > drivers/dax/super.c | 15 +++++++++------ > drivers/md/dm-linear.c | 14 ++++++++------ > drivers/md/dm-log-writes.c | 19 +++++++++++-------- > drivers/md/dm-stripe.c | 14 ++++++++------ > drivers/md/dm-target.c | 2 +- > drivers/md/dm-writecache.c | 8 +++++--- > drivers/md/dm.c | 14 ++++++++------ > drivers/nvdimm/pmem.c | 11 ++++++----- > drivers/nvdimm/pmem.h | 2 +- > drivers/s390/block/dcssblk.c | 13 ++++++++----- > fs/dax.c | 14 ++++++++------ > fs/fuse/dax.c | 4 ++-- > fs/fuse/virtio_fs.c | 12 ++++++++---- > include/linux/dax.h | 18 +++++++++++------- > include/linux/device-mapper.h | 5 +++-- > tools/testing/nvdimm/pmem-dax.c | 2 +- > 16 files changed, 98 insertions(+), 69 deletions(-) > <snip> > diff --git a/include/linux/dax.h b/include/linux/dax.h > index 324363b798ec..931586df2905 100644 > --- a/include/linux/dax.h > +++ b/include/linux/dax.h > @@ -9,6 +9,10 @@ > /* Flag for synchronous flush */ > #define DAXDEV_F_SYNC (1UL << 0) > > +/* dax operation mode dynamically set by caller */ > +#define DAX_OP_NORMAL 0 > +#define DAX_OP_RECOVERY 1 Mostly looks ok to me, but since this is an operation mode, should this be an enum instead of an int? Granted I also think six arguments is a lot... though I don't really see any better way to do this. (Dunno, I spent all day running internal patches through the process gauntlet so this is the remaining 2% of my brain speaking...) --D > + > typedef unsigned long dax_entry_t; > > struct dax_device; > @@ -22,8 +26,8 @@ struct dax_operations { > * logical-page-offset into an absolute physical pfn. Return the > * number of pages available for DAX at that pfn. > */ > - long (*direct_access)(struct dax_device *, pgoff_t, long, > - void **, pfn_t *); > + long (*direct_access)(struct dax_device *, pgoff_t, long, int, > + void **, pfn_t *); > /* > * Validate whether this device is usable as an fsdax backing > * device. > @@ -32,10 +36,10 @@ struct dax_operations { > sector_t, sector_t); > /* copy_from_iter: required operation for fs-dax direct-i/o */ > size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, > - struct iov_iter *); > + struct iov_iter *, int); > /* copy_to_iter: required operation for fs-dax direct-i/o */ > size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, > - struct iov_iter *); > + struct iov_iter *, int); > /* zero_page_range: required operation. Zero page range */ > int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); > }; > @@ -186,11 +190,11 @@ static inline void dax_read_unlock(int id) > bool dax_alive(struct dax_device *dax_dev); > void *dax_get_private(struct dax_device *dax_dev); > long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, > - void **kaddr, pfn_t *pfn); > + int mode, void **kaddr, pfn_t *pfn); > size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, > - size_t bytes, struct iov_iter *i); > + size_t bytes, struct iov_iter *i, int mode); > size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, > - size_t bytes, struct iov_iter *i); > + size_t bytes, struct iov_iter *i, int mode); > int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, > size_t nr_pages); > void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); > diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h > index a7df155ea49b..6596a8e0ceed 100644 > --- a/include/linux/device-mapper.h > +++ b/include/linux/device-mapper.h > @@ -146,9 +146,10 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); > * >= 0 : the number of bytes accessible at the address > */ > typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, > - long nr_pages, void **kaddr, pfn_t *pfn); > + long nr_pages, int mode, void **kaddr, pfn_t *pfn); > typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff, > - void *addr, size_t bytes, struct iov_iter *i); > + void *addr, size_t bytes, struct iov_iter *i, > + int mode); > typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff, > size_t nr_pages); > > diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c > index af19c85558e7..71c225630e7e 100644 > --- a/tools/testing/nvdimm/pmem-dax.c > +++ b/tools/testing/nvdimm/pmem-dax.c > @@ -8,7 +8,7 @@ > #include <nd.h> > > long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, > - long nr_pages, void **kaddr, pfn_t *pfn) > + long nr_pages, int mode, void **kaddr, pfn_t *pfn) > { > resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset; > > -- > 2.18.4 >
On Fri, Nov 5, 2021 at 6:17 PM Jane Chu <jane.chu@oracle.com> wrote: > > Introduce DAX_OP_NORMAL and DAX_OP_RECOVERY operation modes to > {dax_direct_access, dax_copy_from_iter, dax_copy_to_iter}. > DAX_OP_NORMAL is the default or the existing mode, and > DAX_OP_RECOVERY is a new mode for data recovery purpose. > > When dax-FS suspects dax media error might be encountered > on a read or write, it can enact the recovery mode read or write > by setting DAX_OP_RECOVERY in the aforementioned APIs. A read > in recovery mode attempts to fetch as much data as possible > until the first poisoned page is encountered. A write in recovery > mode attempts to clear poison(s) in a page-aligned range and > then write the user provided data over. > > DAX_OP_NORMAL should be used for all non-recovery code path. > > Signed-off-by: Jane Chu <jane.chu@oracle.com> [..] > diff --git a/include/linux/dax.h b/include/linux/dax.h > index 324363b798ec..931586df2905 100644 > --- a/include/linux/dax.h > +++ b/include/linux/dax.h > @@ -9,6 +9,10 @@ > /* Flag for synchronous flush */ > #define DAXDEV_F_SYNC (1UL << 0) > > +/* dax operation mode dynamically set by caller */ > +#define DAX_OP_NORMAL 0 Perhaps this should be called DAX_OP_FAILFAST? > +#define DAX_OP_RECOVERY 1 > + > typedef unsigned long dax_entry_t; > > struct dax_device; > @@ -22,8 +26,8 @@ struct dax_operations { > * logical-page-offset into an absolute physical pfn. Return the > * number of pages available for DAX at that pfn. > */ > - long (*direct_access)(struct dax_device *, pgoff_t, long, > - void **, pfn_t *); > + long (*direct_access)(struct dax_device *, pgoff_t, long, int, Would be nice if that 'int' was an enum, but I'm not sure a new parameter is needed at all, see below... > + void **, pfn_t *); > /* > * Validate whether this device is usable as an fsdax backing > * device. > @@ -32,10 +36,10 @@ struct dax_operations { > sector_t, sector_t); > /* copy_from_iter: required operation for fs-dax direct-i/o */ > size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, > - struct iov_iter *); > + struct iov_iter *, int); I'm not sure the flag is needed here as the "void *" could carry a flag in the pointer to indicate that is a recovery kaddr. > /* copy_to_iter: required operation for fs-dax direct-i/o */ > size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, > - struct iov_iter *); > + struct iov_iter *, int); Same comment here. > /* zero_page_range: required operation. Zero page range */ > int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); > }; > @@ -186,11 +190,11 @@ static inline void dax_read_unlock(int id) > bool dax_alive(struct dax_device *dax_dev); > void *dax_get_private(struct dax_device *dax_dev); > long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, > - void **kaddr, pfn_t *pfn); > + int mode, void **kaddr, pfn_t *pfn); How about dax_direct_access() calling convention stays the same, but the kaddr is optionally updated to carry a flag in the lower unused bits. So: void **kaddr = NULL; /* caller only cares about the pfn */ void *failfast = NULL; void **kaddr = &failfast; /* caller wants -EIO not recovery */ void *recovery = (void *) DAX_OP_RECOVERY; void **kaddr = &recovery; /* caller wants to carefully access page(s) containing poison */
On 11/5/2021 6:50 PM, Darrick J. Wong wrote: > > <snip> > >> diff --git a/include/linux/dax.h b/include/linux/dax.h >> index 324363b798ec..931586df2905 100644 >> --- a/include/linux/dax.h >> +++ b/include/linux/dax.h >> @@ -9,6 +9,10 @@ >> /* Flag for synchronous flush */ >> #define DAXDEV_F_SYNC (1UL << 0) >> >> +/* dax operation mode dynamically set by caller */ >> +#define DAX_OP_NORMAL 0 >> +#define DAX_OP_RECOVERY 1 > > Mostly looks ok to me, but since this is an operation mode, should this > be an enum instead of an int? Yeah, I tried enum at first, and then noticed that the new dax enum type need to be introduced to device-mapper.h by either include dax.h or define a mirrored enum, and I wondered if that would be an over kill, so I ended up settle on #define. > > Granted I also think six arguments is a lot... though I don't really > see any better way to do this. Dan has a suggestion, and that'll reduce the number of args to 5. > > (Dunno, I spent all day running internal patches through the process > gauntlet so this is the remaining 2% of my brain speaking...) Thanks! -jane > > --D >
On 11/6/2021 9:48 AM, Dan Williams wrote: > On Fri, Nov 5, 2021 at 6:17 PM Jane Chu <jane.chu@oracle.com> wrote: >> >> Introduce DAX_OP_NORMAL and DAX_OP_RECOVERY operation modes to >> {dax_direct_access, dax_copy_from_iter, dax_copy_to_iter}. >> DAX_OP_NORMAL is the default or the existing mode, and >> DAX_OP_RECOVERY is a new mode for data recovery purpose. >> >> When dax-FS suspects dax media error might be encountered >> on a read or write, it can enact the recovery mode read or write >> by setting DAX_OP_RECOVERY in the aforementioned APIs. A read >> in recovery mode attempts to fetch as much data as possible >> until the first poisoned page is encountered. A write in recovery >> mode attempts to clear poison(s) in a page-aligned range and >> then write the user provided data over. >> >> DAX_OP_NORMAL should be used for all non-recovery code path. >> >> Signed-off-by: Jane Chu <jane.chu@oracle.com> > [..] >> diff --git a/include/linux/dax.h b/include/linux/dax.h >> index 324363b798ec..931586df2905 100644 >> --- a/include/linux/dax.h >> +++ b/include/linux/dax.h >> @@ -9,6 +9,10 @@ >> /* Flag for synchronous flush */ >> #define DAXDEV_F_SYNC (1UL << 0) >> >> +/* dax operation mode dynamically set by caller */ >> +#define DAX_OP_NORMAL 0 > > Perhaps this should be called DAX_OP_FAILFAST? Sure. > >> +#define DAX_OP_RECOVERY 1 >> + >> typedef unsigned long dax_entry_t; >> >> struct dax_device; >> @@ -22,8 +26,8 @@ struct dax_operations { >> * logical-page-offset into an absolute physical pfn. Return the >> * number of pages available for DAX at that pfn. >> */ >> - long (*direct_access)(struct dax_device *, pgoff_t, long, >> - void **, pfn_t *); >> + long (*direct_access)(struct dax_device *, pgoff_t, long, int, > > Would be nice if that 'int' was an enum, but I'm not sure a new > parameter is needed at all, see below... Let's do your suggestion below. :) > >> + void **, pfn_t *); >> /* >> * Validate whether this device is usable as an fsdax backing >> * device. >> @@ -32,10 +36,10 @@ struct dax_operations { >> sector_t, sector_t); >> /* copy_from_iter: required operation for fs-dax direct-i/o */ >> size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, >> - struct iov_iter *); >> + struct iov_iter *, int); > > I'm not sure the flag is needed here as the "void *" could carry a > flag in the pointer to indicate that is a recovery kaddr. Agreed. > >> /* copy_to_iter: required operation for fs-dax direct-i/o */ >> size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, >> - struct iov_iter *); >> + struct iov_iter *, int); > > Same comment here. > >> /* zero_page_range: required operation. Zero page range */ >> int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); >> }; >> @@ -186,11 +190,11 @@ static inline void dax_read_unlock(int id) >> bool dax_alive(struct dax_device *dax_dev); >> void *dax_get_private(struct dax_device *dax_dev); >> long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, >> - void **kaddr, pfn_t *pfn); >> + int mode, void **kaddr, pfn_t *pfn); > > How about dax_direct_access() calling convention stays the same, but > the kaddr is optionally updated to carry a flag in the lower unused > bits. So: > > void **kaddr = NULL; /* caller only cares about the pfn */ > > void *failfast = NULL; > void **kaddr = &failfast; /* caller wants -EIO not recovery */ > > void *recovery = (void *) DAX_OP_RECOVERY; > void **kaddr = &recovery; /* caller wants to carefully access page(s) > containing poison */ > Got it. thanks! -jane
On Mon, Nov 08, 2021 at 09:02:29PM +0000, Jane Chu wrote: > On 11/6/2021 9:48 AM, Dan Williams wrote: > > On Fri, Nov 5, 2021 at 6:17 PM Jane Chu <jane.chu@oracle.com> wrote: > >> > >> Introduce DAX_OP_NORMAL and DAX_OP_RECOVERY operation modes to > >> {dax_direct_access, dax_copy_from_iter, dax_copy_to_iter}. > >> DAX_OP_NORMAL is the default or the existing mode, and > >> DAX_OP_RECOVERY is a new mode for data recovery purpose. > >> > >> When dax-FS suspects dax media error might be encountered > >> on a read or write, it can enact the recovery mode read or write > >> by setting DAX_OP_RECOVERY in the aforementioned APIs. A read > >> in recovery mode attempts to fetch as much data as possible > >> until the first poisoned page is encountered. A write in recovery > >> mode attempts to clear poison(s) in a page-aligned range and > >> then write the user provided data over. > >> > >> DAX_OP_NORMAL should be used for all non-recovery code path. > >> > >> Signed-off-by: Jane Chu <jane.chu@oracle.com> > > [..] > >> diff --git a/include/linux/dax.h b/include/linux/dax.h > >> index 324363b798ec..931586df2905 100644 > >> --- a/include/linux/dax.h > >> +++ b/include/linux/dax.h > >> @@ -9,6 +9,10 @@ > >> /* Flag for synchronous flush */ > >> #define DAXDEV_F_SYNC (1UL << 0) > >> > >> +/* dax operation mode dynamically set by caller */ > >> +#define DAX_OP_NORMAL 0 > > > > Perhaps this should be called DAX_OP_FAILFAST? > > Sure. > > > > >> +#define DAX_OP_RECOVERY 1 > >> + > >> typedef unsigned long dax_entry_t; > >> > >> struct dax_device; > >> @@ -22,8 +26,8 @@ struct dax_operations { > >> * logical-page-offset into an absolute physical pfn. Return the > >> * number of pages available for DAX at that pfn. > >> */ > >> - long (*direct_access)(struct dax_device *, pgoff_t, long, > >> - void **, pfn_t *); > >> + long (*direct_access)(struct dax_device *, pgoff_t, long, int, > > > > Would be nice if that 'int' was an enum, but I'm not sure a new > > parameter is needed at all, see below... > > Let's do your suggestion below. :) > > > > >> + void **, pfn_t *); > >> /* > >> * Validate whether this device is usable as an fsdax backing > >> * device. > >> @@ -32,10 +36,10 @@ struct dax_operations { > >> sector_t, sector_t); > >> /* copy_from_iter: required operation for fs-dax direct-i/o */ > >> size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, > >> - struct iov_iter *); > >> + struct iov_iter *, int); > > > > I'm not sure the flag is needed here as the "void *" could carry a > > flag in the pointer to indicate that is a recovery kaddr. > > Agreed. Not sure if this is implied but I would like some macros or other helper functions to check these flags hidden in the addresses. For me I'm a bit scared about having flags hidden in the address like this because I can't lead to some confusions IMO. But if we have some macros or other calls which can make this more obvious of what is going on I think that would help. Apologies if this was what you were already going to do... :-D Ira > > > > >> /* copy_to_iter: required operation for fs-dax direct-i/o */ > >> size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, > >> - struct iov_iter *); > >> + struct iov_iter *, int); > > > > Same comment here. > > > >> /* zero_page_range: required operation. Zero page range */ > >> int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); > >> }; > >> @@ -186,11 +190,11 @@ static inline void dax_read_unlock(int id) > >> bool dax_alive(struct dax_device *dax_dev); > >> void *dax_get_private(struct dax_device *dax_dev); > >> long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, > >> - void **kaddr, pfn_t *pfn); > >> + int mode, void **kaddr, pfn_t *pfn); > > > > How about dax_direct_access() calling convention stays the same, but > > the kaddr is optionally updated to carry a flag in the lower unused > > bits. So: > > > > void **kaddr = NULL; /* caller only cares about the pfn */ > > > > void *failfast = NULL; > > void **kaddr = &failfast; /* caller wants -EIO not recovery */ > > > > void *recovery = (void *) DAX_OP_RECOVERY; > > void **kaddr = &recovery; /* caller wants to carefully access page(s) > > containing poison */ > > > > Got it. > > thanks! > -jane >
On Mon, Nov 8, 2021 at 9:26 PM Ira Weiny <ira.weiny@intel.com> wrote: > > On Mon, Nov 08, 2021 at 09:02:29PM +0000, Jane Chu wrote: > > On 11/6/2021 9:48 AM, Dan Williams wrote: > > > On Fri, Nov 5, 2021 at 6:17 PM Jane Chu <jane.chu@oracle.com> wrote: > > >> > > >> Introduce DAX_OP_NORMAL and DAX_OP_RECOVERY operation modes to > > >> {dax_direct_access, dax_copy_from_iter, dax_copy_to_iter}. > > >> DAX_OP_NORMAL is the default or the existing mode, and > > >> DAX_OP_RECOVERY is a new mode for data recovery purpose. > > >> > > >> When dax-FS suspects dax media error might be encountered > > >> on a read or write, it can enact the recovery mode read or write > > >> by setting DAX_OP_RECOVERY in the aforementioned APIs. A read > > >> in recovery mode attempts to fetch as much data as possible > > >> until the first poisoned page is encountered. A write in recovery > > >> mode attempts to clear poison(s) in a page-aligned range and > > >> then write the user provided data over. > > >> > > >> DAX_OP_NORMAL should be used for all non-recovery code path. > > >> > > >> Signed-off-by: Jane Chu <jane.chu@oracle.com> > > > [..] > > >> diff --git a/include/linux/dax.h b/include/linux/dax.h > > >> index 324363b798ec..931586df2905 100644 > > >> --- a/include/linux/dax.h > > >> +++ b/include/linux/dax.h > > >> @@ -9,6 +9,10 @@ > > >> /* Flag for synchronous flush */ > > >> #define DAXDEV_F_SYNC (1UL << 0) > > >> > > >> +/* dax operation mode dynamically set by caller */ > > >> +#define DAX_OP_NORMAL 0 > > > > > > Perhaps this should be called DAX_OP_FAILFAST? > > > > Sure. > > > > > > > >> +#define DAX_OP_RECOVERY 1 > > >> + > > >> typedef unsigned long dax_entry_t; > > >> > > >> struct dax_device; > > >> @@ -22,8 +26,8 @@ struct dax_operations { > > >> * logical-page-offset into an absolute physical pfn. Return the > > >> * number of pages available for DAX at that pfn. > > >> */ > > >> - long (*direct_access)(struct dax_device *, pgoff_t, long, > > >> - void **, pfn_t *); > > >> + long (*direct_access)(struct dax_device *, pgoff_t, long, int, > > > > > > Would be nice if that 'int' was an enum, but I'm not sure a new > > > parameter is needed at all, see below... > > > > Let's do your suggestion below. :) > > > > > > > >> + void **, pfn_t *); > > >> /* > > >> * Validate whether this device is usable as an fsdax backing > > >> * device. > > >> @@ -32,10 +36,10 @@ struct dax_operations { > > >> sector_t, sector_t); > > >> /* copy_from_iter: required operation for fs-dax direct-i/o */ > > >> size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, > > >> - struct iov_iter *); > > >> + struct iov_iter *, int); > > > > > > I'm not sure the flag is needed here as the "void *" could carry a > > > flag in the pointer to indicate that is a recovery kaddr. > > > > Agreed. > > Not sure if this is implied but I would like some macros or other helper > functions to check these flags hidden in the addresses. > > For me I'm a bit scared about having flags hidden in the address like this > because I can't lead to some confusions IMO. > > But if we have some macros or other calls which can make this more obvious of > what is going on I think that would help. You could go further and mark it as an 'unsigned long __bitwise' type to get the compiler to help with enforcing accessors to strip off the flag bits.
diff --git a/drivers/dax/super.c b/drivers/dax/super.c index c0910687fbcb..90cae9d84b9c 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -110,6 +110,7 @@ enum dax_device_flags { * @dax_dev: a dax_device instance representing the logical memory range * @pgoff: offset in pages from the start of the device to translate * @nr_pages: number of consecutive pages caller can handle relative to @pfn + * @mode: indicate whether dax operation is in normal or recovery mode * @kaddr: output parameter that returns a virtual address mapping of pfn * @pfn: output parameter that returns an absolute pfn translation of @pgoff * @@ -117,7 +118,7 @@ enum dax_device_flags { * pages accessible at the device relative @pgoff. */ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, - void **kaddr, pfn_t *pfn) + int mode, void **kaddr, pfn_t *pfn) { long avail; @@ -131,7 +132,7 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, return -EINVAL; avail = dax_dev->ops->direct_access(dax_dev, pgoff, nr_pages, - kaddr, pfn); + mode, kaddr, pfn); if (!avail) return -ERANGE; return min(avail, nr_pages); @@ -139,22 +140,24 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, EXPORT_SYMBOL_GPL(dax_direct_access); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, - size_t bytes, struct iov_iter *i) + size_t bytes, struct iov_iter *i, int mode) { if (!dax_alive(dax_dev)) return 0; - return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i); + return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i, + mode); } EXPORT_SYMBOL_GPL(dax_copy_from_iter); size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, - size_t bytes, struct iov_iter *i) + size_t bytes, struct iov_iter *i, int mode) { if (!dax_alive(dax_dev)) return 0; - return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i); + return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i, + mode); } EXPORT_SYMBOL_GPL(dax_copy_to_iter); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 90de42f6743a..c73ac6b98801 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -173,27 +173,29 @@ static struct dax_device *linear_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff) } static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, int mode, void **kaddr, pfn_t *pfn) { struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff); - return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); + return dax_direct_access(dax_dev, pgoff, nr_pages, mode, kaddr, pfn); } static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) + void *addr, size_t bytes, struct iov_iter *i, + int mode) { struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff); - return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); + return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i, mode); } static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) + void *addr, size_t bytes, struct iov_iter *i, + int mode) { struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff); - return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); + return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i, mode); } static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index df3cd78223fb..1e9847f904ef 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -959,16 +959,18 @@ static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti, } static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, int mode, + void **kaddr, pfn_t *pfn) { struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff); - return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); + return dax_direct_access(dax_dev, pgoff, nr_pages, mode, kaddr, pfn); } static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, - pgoff_t pgoff, void *addr, size_t bytes, - struct iov_iter *i) + pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i, + int mode) { struct log_writes_c *lc = ti->private; sector_t sector = pgoff * PAGE_SECTORS; @@ -985,16 +987,17 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, return 0; } dax_copy: - return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); + return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i, mode); } static size_t log_writes_dax_copy_to_iter(struct dm_target *ti, - pgoff_t pgoff, void *addr, size_t bytes, - struct iov_iter *i) + pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i, + int mode) { struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff); - return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); + return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i, mode); } static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 50dba3f39274..4c098452268b 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -317,27 +317,29 @@ static struct dax_device *stripe_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff) } static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, int mode, void **kaddr, pfn_t *pfn) { struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff); - return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); + return dax_direct_access(dax_dev, pgoff, nr_pages, mode, kaddr, pfn); } static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) + void *addr, size_t bytes, struct iov_iter *i, + int mode) { struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff); - return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); + return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i, mode); } static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) + void *addr, size_t bytes, struct iov_iter *i, + int mode) { struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff); - return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); + return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i, mode); } static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 64dd0b34fcf4..2de1073dbad6 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -142,7 +142,7 @@ static void io_err_release_clone_rq(struct request *clone, } static long io_err_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, int mode, void **kaddr, pfn_t *pfn) { return -EIO; } diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 0af464a863fe..b2e4ff922fe2 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -286,7 +286,8 @@ static int persistent_memory_claim(struct dm_writecache *wc) id = dax_read_lock(); - da = dax_direct_access(wc->ssd_dev->dax_dev, offset, p, &wc->memory_map, &pfn); + da = dax_direct_access(wc->ssd_dev->dax_dev, offset, p, DAX_OP_NORMAL, + &wc->memory_map, &pfn); if (da < 0) { wc->memory_map = NULL; r = da; @@ -308,8 +309,9 @@ static int persistent_memory_claim(struct dm_writecache *wc) i = 0; do { long daa; - daa = dax_direct_access(wc->ssd_dev->dax_dev, offset + i, p - i, - NULL, &pfn); + daa = dax_direct_access(wc->ssd_dev->dax_dev, + offset + i, p - i, + DAX_OP_NORMAL, NULL, &pfn); if (daa <= 0) { r = daa ? daa : -EINVAL; goto err3; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 282008afc465..dc354db22ef9 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1001,7 +1001,8 @@ static struct dm_target *dm_dax_get_live_target(struct mapped_device *md, } static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, int mode, void **kaddr, + pfn_t *pfn) { struct mapped_device *md = dax_get_private(dax_dev); sector_t sector = pgoff * PAGE_SECTORS; @@ -1019,7 +1020,7 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, if (len < 1) goto out; nr_pages = min(len, nr_pages); - ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn); + ret = ti->type->direct_access(ti, pgoff, nr_pages, mode, kaddr, pfn); out: dm_put_live_table(md, srcu_idx); @@ -1028,7 +1029,8 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, } static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) + void *addr, size_t bytes, + struct iov_iter *i, int mode) { struct mapped_device *md = dax_get_private(dax_dev); sector_t sector = pgoff * PAGE_SECTORS; @@ -1044,7 +1046,7 @@ static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, ret = copy_from_iter(addr, bytes, i); goto out; } - ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i); + ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i, mode); out: dm_put_live_table(md, srcu_idx); @@ -1052,7 +1054,7 @@ static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, } static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) + void *addr, size_t bytes, struct iov_iter *i, int mode) { struct mapped_device *md = dax_get_private(dax_dev); sector_t sector = pgoff * PAGE_SECTORS; @@ -1068,7 +1070,7 @@ static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, ret = copy_to_iter(addr, bytes, i); goto out; } - ret = ti->type->dax_copy_to_iter(ti, pgoff, addr, bytes, i); + ret = ti->type->dax_copy_to_iter(ti, pgoff, addr, bytes, i, mode); out: dm_put_live_table(md, srcu_idx); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 0d6633987552..3dc99e0bf633 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -255,7 +255,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, int mode, void **kaddr, pfn_t *pfn) { resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset; @@ -294,11 +294,12 @@ static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, } static long pmem_dax_direct_access(struct dax_device *dax_dev, - pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) + pgoff_t pgoff, long nr_pages, int mode, void **kaddr, + pfn_t *pfn) { struct pmem_device *pmem = dax_get_private(dax_dev); - return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn); + return __pmem_direct_access(pmem, pgoff, nr_pages, mode, kaddr, pfn); } /* @@ -308,13 +309,13 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev, * dax_iomap_actor() */ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) + void *addr, size_t bytes, struct iov_iter *i, int mode) { return _copy_from_iter_flushcache(addr, bytes, i); } static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) + void *addr, size_t bytes, struct iov_iter *i, int mode) { return _copy_mc_to_iter(addr, bytes, i); } diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h index 59cfe13ea8a8..bda6a898ba81 100644 --- a/drivers/nvdimm/pmem.h +++ b/drivers/nvdimm/pmem.h @@ -27,7 +27,7 @@ struct pmem_device { }; long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn); + long nr_pages, int mode, void **kaddr, pfn_t *pfn); #ifdef CONFIG_MEMORY_FAILURE static inline bool test_and_clear_pmem_poison(struct page *page) diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index e65e83764d1c..fb9f768e12a1 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -32,7 +32,7 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode); static void dcssblk_release(struct gendisk *disk, fmode_t mode); static void dcssblk_submit_bio(struct bio *bio); static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn); + long nr_pages, int mode, void **kaddr, pfn_t *pfn); static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; @@ -45,13 +45,15 @@ static const struct block_device_operations dcssblk_devops = { }; static size_t dcssblk_dax_copy_from_iter(struct dax_device *dax_dev, - pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) + pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i, + int mode) { return copy_from_iter(addr, bytes, i); } static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev, - pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) + pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i, + int mode) { return copy_to_iter(addr, bytes, i); } @@ -62,7 +64,8 @@ static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev, long rc; void *kaddr; - rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL); + rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_OP_NORMAL, + &kaddr, NULL); if (rc < 0) return rc; memset(kaddr, 0, nr_pages << PAGE_SHIFT); @@ -941,7 +944,7 @@ __dcssblk_direct_access(struct dcssblk_dev_info *dev_info, pgoff_t pgoff, static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, int mode, void **kaddr, pfn_t *pfn) { struct dcssblk_dev_info *dev_info = dax_get_private(dax_dev); diff --git a/fs/dax.c b/fs/dax.c index eb715363fd66..bea6df1498c3 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -735,7 +735,8 @@ static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d return rc; id = dax_read_lock(); - rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL); + rc = dax_direct_access(dax_dev, pgoff, 1, DAX_OP_NORMAL, &kaddr, + NULL); if (rc < 0) { dax_read_unlock(id); return rc; @@ -1036,7 +1037,7 @@ static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size, return rc; id = dax_read_lock(); length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), - NULL, pfnp); + DAX_OP_NORMAL, NULL, pfnp); if (length < 0) { rc = length; goto out; @@ -1162,7 +1163,8 @@ s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap) if (page_aligned) rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1); else - rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL); + rc = dax_direct_access(iomap->dax_dev, pgoff, 1, + DAX_OP_NORMAL, &kaddr, NULL); if (rc < 0) { dax_read_unlock(id); return rc; @@ -1231,7 +1233,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, break; map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), - &kaddr, NULL); + DAX_OP_NORMAL, &kaddr, NULL); if (map_len < 0) { ret = map_len; break; @@ -1250,10 +1252,10 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, */ if (iov_iter_rw(iter) == WRITE) xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr, - map_len, iter); + map_len, iter, DAX_OP_NORMAL); else xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr, - map_len, iter); + map_len, iter, DAX_OP_NORMAL); pos += xfer; length -= xfer; diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index 713818d74de6..755d8d4b7d34 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -1241,8 +1241,8 @@ static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd) INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker); id = dax_read_lock(); - nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), NULL, - NULL); + nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), + DAX_OP_NORMAL, NULL, NULL); dax_read_unlock(id); if (nr_pages < 0) { pr_debug("dax_direct_access() returned %ld\n", nr_pages); diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index b4c7c7fa987f..fb5433a37a7b 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -739,7 +739,8 @@ static void virtio_fs_cleanup_vqs(struct virtio_device *vdev, * offset. */ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, int mode, void **kaddr, + pfn_t *pfn) { struct virtio_fs *fs = dax_get_private(dax_dev); phys_addr_t offset = PFN_PHYS(pgoff); @@ -755,14 +756,16 @@ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, - size_t bytes, struct iov_iter *i) + size_t bytes, struct iov_iter *i, + int mode) { return copy_from_iter(addr, bytes, i); } static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, - size_t bytes, struct iov_iter *i) + size_t bytes, struct iov_iter *i, + int mode) { return copy_to_iter(addr, bytes, i); } @@ -773,7 +776,8 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev, long rc; void *kaddr; - rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL); + rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_OP_NORMAL, + &kaddr, NULL); if (rc < 0) return rc; memset(kaddr, 0, nr_pages << PAGE_SHIFT); diff --git a/include/linux/dax.h b/include/linux/dax.h index 324363b798ec..931586df2905 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -9,6 +9,10 @@ /* Flag for synchronous flush */ #define DAXDEV_F_SYNC (1UL << 0) +/* dax operation mode dynamically set by caller */ +#define DAX_OP_NORMAL 0 +#define DAX_OP_RECOVERY 1 + typedef unsigned long dax_entry_t; struct dax_device; @@ -22,8 +26,8 @@ struct dax_operations { * logical-page-offset into an absolute physical pfn. Return the * number of pages available for DAX at that pfn. */ - long (*direct_access)(struct dax_device *, pgoff_t, long, - void **, pfn_t *); + long (*direct_access)(struct dax_device *, pgoff_t, long, int, + void **, pfn_t *); /* * Validate whether this device is usable as an fsdax backing * device. @@ -32,10 +36,10 @@ struct dax_operations { sector_t, sector_t); /* copy_from_iter: required operation for fs-dax direct-i/o */ size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, - struct iov_iter *); + struct iov_iter *, int); /* copy_to_iter: required operation for fs-dax direct-i/o */ size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, - struct iov_iter *); + struct iov_iter *, int); /* zero_page_range: required operation. Zero page range */ int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); }; @@ -186,11 +190,11 @@ static inline void dax_read_unlock(int id) bool dax_alive(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, - void **kaddr, pfn_t *pfn); + int mode, void **kaddr, pfn_t *pfn); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, - size_t bytes, struct iov_iter *i); + size_t bytes, struct iov_iter *i, int mode); size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, - size_t bytes, struct iov_iter *i); + size_t bytes, struct iov_iter *i, int mode); int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, size_t nr_pages); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a7df155ea49b..6596a8e0ceed 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -146,9 +146,10 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); * >= 0 : the number of bytes accessible at the address */ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn); + long nr_pages, int mode, void **kaddr, pfn_t *pfn); typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i); + void *addr, size_t bytes, struct iov_iter *i, + int mode); typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff, size_t nr_pages); diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c index af19c85558e7..71c225630e7e 100644 --- a/tools/testing/nvdimm/pmem-dax.c +++ b/tools/testing/nvdimm/pmem-dax.c @@ -8,7 +8,7 @@ #include <nd.h> long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, int mode, void **kaddr, pfn_t *pfn) { resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
Introduce DAX_OP_NORMAL and DAX_OP_RECOVERY operation modes to {dax_direct_access, dax_copy_from_iter, dax_copy_to_iter}. DAX_OP_NORMAL is the default or the existing mode, and DAX_OP_RECOVERY is a new mode for data recovery purpose. When dax-FS suspects dax media error might be encountered on a read or write, it can enact the recovery mode read or write by setting DAX_OP_RECOVERY in the aforementioned APIs. A read in recovery mode attempts to fetch as much data as possible until the first poisoned page is encountered. A write in recovery mode attempts to clear poison(s) in a page-aligned range and then write the user provided data over. DAX_OP_NORMAL should be used for all non-recovery code path. Signed-off-by: Jane Chu <jane.chu@oracle.com> --- drivers/dax/super.c | 15 +++++++++------ drivers/md/dm-linear.c | 14 ++++++++------ drivers/md/dm-log-writes.c | 19 +++++++++++-------- drivers/md/dm-stripe.c | 14 ++++++++------ drivers/md/dm-target.c | 2 +- drivers/md/dm-writecache.c | 8 +++++--- drivers/md/dm.c | 14 ++++++++------ drivers/nvdimm/pmem.c | 11 ++++++----- drivers/nvdimm/pmem.h | 2 +- drivers/s390/block/dcssblk.c | 13 ++++++++----- fs/dax.c | 14 ++++++++------ fs/fuse/dax.c | 4 ++-- fs/fuse/virtio_fs.c | 12 ++++++++---- include/linux/dax.h | 18 +++++++++++------- include/linux/device-mapper.h | 5 +++-- tools/testing/nvdimm/pmem-dax.c | 2 +- 16 files changed, 98 insertions(+), 69 deletions(-)