diff mbox

[10/20] mm: Move handling of COW faults into DAX code

Message ID 1474992504-20133-11-git-send-email-jack@suse.cz (mailing list archive)
State Not Applicable
Headers show

Commit Message

Jan Kara Sept. 27, 2016, 4:08 p.m. UTC
Move final handling of COW faults from generic code into DAX fault
handler. That way generic code doesn't have to be aware of peculiarities
of DAX locking so remove that knowledge.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/dax.c            | 22 ++++++++++++++++------
 include/linux/dax.h |  7 -------
 include/linux/mm.h  |  9 +--------
 mm/memory.c         | 14 ++++----------
 4 files changed, 21 insertions(+), 31 deletions(-)

Comments

Ross Zwisler Oct. 17, 2016, 7:29 p.m. UTC | #1
On Tue, Sep 27, 2016 at 06:08:14PM +0200, Jan Kara wrote:
> Move final handling of COW faults from generic code into DAX fault
> handler. That way generic code doesn't have to be aware of peculiarities
> of DAX locking so remove that knowledge.
> 
> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
>  fs/dax.c            | 22 ++++++++++++++++------
>  include/linux/dax.h |  7 -------
>  include/linux/mm.h  |  9 +--------
>  mm/memory.c         | 14 ++++----------
>  4 files changed, 21 insertions(+), 31 deletions(-)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 0dc251ca77b8..b1c503930d1d 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -876,10 +876,15 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
>  			goto unlock_entry;
>  		if (!radix_tree_exceptional_entry(entry)) {
>  			vmf->page = entry;
> -			return VM_FAULT_LOCKED;
> +			if (unlikely(PageHWPoison(entry))) {
> +				put_locked_mapping_entry(mapping, vmf->pgoff,
> +							 entry);
> +				return VM_FAULT_HWPOISON;
> +			}
>  		}
> -		vmf->entry = entry;
> -		return VM_FAULT_DAX_LOCKED;
> +		error = finish_fault(vmf);
> +		put_locked_mapping_entry(mapping, vmf->pgoff, entry);
> +		return error ? error : VM_FAULT_DONE_COW;
>  	}
>  
>  	if (!buffer_mapped(&bh)) {
> @@ -1430,10 +1435,15 @@ int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
>  			goto unlock_entry;
>  		if (!radix_tree_exceptional_entry(entry)) {
>  			vmf->page = entry;

In __do_fault() we explicitly clear vmf->page in the case where PageHWPoison()
is set.  I think we can get the same behavior here by moving the call that
sets vmf->page after the PageHWPoison() check.

> -			return VM_FAULT_LOCKED;
> +			if (unlikely(PageHWPoison(entry))) {
> +				put_locked_mapping_entry(mapping, vmf->pgoff,
> +							 entry);
> +				return VM_FAULT_HWPOISON;
> +			}
>  		}
> -		vmf->entry = entry;
> -		return VM_FAULT_DAX_LOCKED;

I think we're missing a call to 

	__SetPageUptodate(new_page);

before finish_fault()?  This call currently lives in do_cow_fault(), and
is part of the path that we don't skip as part of the VM_FAULT_DAX_LOCKED
logic.

Both of these comments apply equally to the iomap_dax_fault() code.
Jan Kara Oct. 18, 2016, 10:32 a.m. UTC | #2
On Mon 17-10-16 13:29:49, Ross Zwisler wrote:
> On Tue, Sep 27, 2016 at 06:08:14PM +0200, Jan Kara wrote:
> > Move final handling of COW faults from generic code into DAX fault
> > handler. That way generic code doesn't have to be aware of peculiarities
> > of DAX locking so remove that knowledge.
> > 
> > Signed-off-by: Jan Kara <jack@suse.cz>
> > ---
> >  fs/dax.c            | 22 ++++++++++++++++------
> >  include/linux/dax.h |  7 -------
> >  include/linux/mm.h  |  9 +--------
> >  mm/memory.c         | 14 ++++----------
> >  4 files changed, 21 insertions(+), 31 deletions(-)
> > 
> > diff --git a/fs/dax.c b/fs/dax.c
> > index 0dc251ca77b8..b1c503930d1d 100644
> > --- a/fs/dax.c
> > +++ b/fs/dax.c
> > @@ -876,10 +876,15 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> >  			goto unlock_entry;
> >  		if (!radix_tree_exceptional_entry(entry)) {
> >  			vmf->page = entry;
> > -			return VM_FAULT_LOCKED;
> > +			if (unlikely(PageHWPoison(entry))) {
> > +				put_locked_mapping_entry(mapping, vmf->pgoff,
> > +							 entry);
> > +				return VM_FAULT_HWPOISON;
> > +			}
> >  		}
> > -		vmf->entry = entry;
> > -		return VM_FAULT_DAX_LOCKED;
> > +		error = finish_fault(vmf);
> > +		put_locked_mapping_entry(mapping, vmf->pgoff, entry);
> > +		return error ? error : VM_FAULT_DONE_COW;
> >  	}
> >  
> >  	if (!buffer_mapped(&bh)) {
> > @@ -1430,10 +1435,15 @@ int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> >  			goto unlock_entry;
> >  		if (!radix_tree_exceptional_entry(entry)) {
> >  			vmf->page = entry;
> 
> In __do_fault() we explicitly clear vmf->page in the case where PageHWPoison()
> is set.  I think we can get the same behavior here by moving the call that
> sets vmf->page after the PageHWPoison() check.

Actually, the whole HWPoison checking was non-sensical for DAX. We want to
check for HWPoison to avoid reading from poisoned pages. However for DAX we
either use copy_user_dax() which takes care of IO errors / poisoning itself
or we use clear_user_highpage() which doesn't touch the source page. So we
don't have to check for HWPoison at all. Fixed.

> > -			return VM_FAULT_LOCKED;
> > +			if (unlikely(PageHWPoison(entry))) {
> > +				put_locked_mapping_entry(mapping, vmf->pgoff,
> > +							 entry);
> > +				return VM_FAULT_HWPOISON;
> > +			}
> >  		}
> > -		vmf->entry = entry;
> > -		return VM_FAULT_DAX_LOCKED;
> 
> I think we're missing a call to 
> 
> 	__SetPageUptodate(new_page);

> before finish_fault()?  This call currently lives in do_cow_fault(), and
> is part of the path that we don't skip as part of the VM_FAULT_DAX_LOCKED
> logic.

Ah, great catch. I wonder how the DAX COW test could have passed with this?
Maybe PageUptodate is not used much for anon pages... Anyway thanks for
spotting this.

								Honza
diff mbox

Patch

diff --git a/fs/dax.c b/fs/dax.c
index 0dc251ca77b8..b1c503930d1d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -876,10 +876,15 @@  int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 			goto unlock_entry;
 		if (!radix_tree_exceptional_entry(entry)) {
 			vmf->page = entry;
-			return VM_FAULT_LOCKED;
+			if (unlikely(PageHWPoison(entry))) {
+				put_locked_mapping_entry(mapping, vmf->pgoff,
+							 entry);
+				return VM_FAULT_HWPOISON;
+			}
 		}
-		vmf->entry = entry;
-		return VM_FAULT_DAX_LOCKED;
+		error = finish_fault(vmf);
+		put_locked_mapping_entry(mapping, vmf->pgoff, entry);
+		return error ? error : VM_FAULT_DONE_COW;
 	}
 
 	if (!buffer_mapped(&bh)) {
@@ -1430,10 +1435,15 @@  int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 			goto unlock_entry;
 		if (!radix_tree_exceptional_entry(entry)) {
 			vmf->page = entry;
-			return VM_FAULT_LOCKED;
+			if (unlikely(PageHWPoison(entry))) {
+				put_locked_mapping_entry(mapping, vmf->pgoff,
+							 entry);
+				return VM_FAULT_HWPOISON;
+			}
 		}
-		vmf->entry = entry;
-		return VM_FAULT_DAX_LOCKED;
+		error = finish_fault(vmf);
+		put_locked_mapping_entry(mapping, vmf->pgoff, entry);
+		return error ? error : VM_FAULT_DONE_COW;
 	}
 
 	switch (iomap.type) {
diff --git a/include/linux/dax.h b/include/linux/dax.h
index add6c4bc568f..b1a1acd10df2 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -26,7 +26,6 @@  void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 
 #ifdef CONFIG_FS_DAX
 struct page *read_dax_sector(struct block_device *bdev, sector_t n);
-void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index);
 int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
 		unsigned int offset, unsigned int length);
 #else
@@ -35,12 +34,6 @@  static inline struct page *read_dax_sector(struct block_device *bdev,
 {
 	return ERR_PTR(-ENXIO);
 }
-/* Shouldn't ever be called when dax is disabled. */
-static inline void dax_unlock_mapping_entry(struct address_space *mapping,
-					    pgoff_t index)
-{
-	BUG();
-}
 static inline int __dax_zero_page_range(struct block_device *bdev,
 		sector_t sector, unsigned int offset, unsigned int length)
 {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 919ebdd27f1e..1055f2ece80d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -310,12 +310,6 @@  struct vm_fault {
 					 * is set (which is also implied by
 					 * VM_FAULT_ERROR).
 					 */
-	void *entry;			/* ->fault handler can alternatively
-					 * return locked DAX entry. In that
-					 * case handler should return
-					 * VM_FAULT_DAX_LOCKED and fill in
-					 * entry here.
-					 */
 	/* These three entries are valid only while holding ptl lock */
 	pte_t *pte;			/* Pointer to pte entry matching
 					 * the 'address'. NULL if the page
@@ -1118,8 +1112,7 @@  static inline void clear_page_pfmemalloc(struct page *page)
 #define VM_FAULT_LOCKED	0x0200	/* ->fault locked the returned page */
 #define VM_FAULT_RETRY	0x0400	/* ->fault blocked, must retry */
 #define VM_FAULT_FALLBACK 0x0800	/* huge page fault failed, fall back to small */
-#define VM_FAULT_DAX_LOCKED 0x1000	/* ->fault has locked DAX entry */
-#define VM_FAULT_DONE_COW   0x2000	/* ->fault has fully handled COW */
+#define VM_FAULT_DONE_COW   0x1000	/* ->fault has fully handled COW */
 
 #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
 
diff --git a/mm/memory.c b/mm/memory.c
index f54cfad7fe04..a4522e8999b2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2845,7 +2845,7 @@  static int __do_fault(struct vm_fault *vmf)
 
 	ret = vma->vm_ops->fault(vma, vmf);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
-			    VM_FAULT_DAX_LOCKED | VM_FAULT_DONE_COW)))
+			    VM_FAULT_DONE_COW)))
 		return ret;
 
 	if (unlikely(PageHWPoison(vmf->page))) {
@@ -3239,17 +3239,11 @@  static int do_cow_fault(struct vm_fault *vmf)
 	if (ret & VM_FAULT_DONE_COW)
 		return ret;
 
-	if (!(ret & VM_FAULT_DAX_LOCKED))
-		copy_user_highpage(new_page, vmf->page, vmf->address, vma);
+	copy_user_highpage(new_page, vmf->page, vmf->address, vma);
 	__SetPageUptodate(new_page);
-
 	ret |= finish_fault(vmf);
-	if (!(ret & VM_FAULT_DAX_LOCKED)) {
-		unlock_page(vmf->page);
-		put_page(vmf->page);
-	} else {
-		dax_unlock_mapping_entry(vma->vm_file->f_mapping, vmf->pgoff);
-	}
+	unlock_page(vmf->page);
+	put_page(vmf->page);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
 		goto uncharge_out;
 	return ret;