Message ID | 20190429172649.8288-9-rgoldwyn@suse.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs dax support | expand |
On Mon, Apr 29, 2019 at 12:26:39PM -0500, Goldwyn Rodrigues wrote: > From: Goldwyn Rodrigues <rgoldwyn@suse.com> > > Change dax_iomap_pfn to return the address as well in order to > use it for performing a memcpy in case the type is IOMAP_DAX_COW. > We don't handle PMD because btrfs does not support hugepages. > > Question: > The sequence of bdev_dax_pgoff() and dax_direct_access() is > used multiple times to calculate address and pfn's. Would it make > sense to call it while calculating address as well to reduce code? > > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> > --- > fs/dax.c | 19 +++++++++++++++---- > 1 file changed, 15 insertions(+), 4 deletions(-) > > diff --git a/fs/dax.c b/fs/dax.c > index 610bfa861a28..718b1632a39d 100644 > --- a/fs/dax.c > +++ b/fs/dax.c > @@ -984,7 +984,7 @@ static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) > } > > static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, > - pfn_t *pfnp) > + pfn_t *pfnp, void **addr) > { > const sector_t sector = dax_iomap_sector(iomap, pos); > pgoff_t pgoff; > @@ -996,7 +996,7 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, > return rc; > id = dax_read_lock(); > length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), > - NULL, pfnp); > + addr, pfnp); > if (length < 0) { > rc = length; > goto out; > @@ -1286,6 +1286,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, > XA_STATE(xas, &mapping->i_pages, vmf->pgoff); > struct inode *inode = mapping->host; > unsigned long vaddr = vmf->address; > + void *addr; > loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT; > struct iomap iomap = { 0 }; Ugh, I had forgotten that fs/dax.c open-codes iomap_apply, probably because the actor returns vm_fault_t, not bytes copied. I guess that makes it a tiny bit more complicated to pass in two (struct iomap *) to the iomap_begin function... > unsigned flags = IOMAP_FAULT; > @@ -1375,16 +1376,26 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, > sync = dax_fault_is_synchronous(flags, vma, &iomap); > > switch (iomap.type) { > + case IOMAP_DAX_COW: > case IOMAP_MAPPED: > if (iomap.flags & IOMAP_F_NEW) { > count_vm_event(PGMAJFAULT); > count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); > major = VM_FAULT_MAJOR; > } > - error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn); > + error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn, &addr); > if (error < 0) > goto error_finish_iomap; > > + if (iomap.type == IOMAP_DAX_COW) { > + if (iomap.inline_data) { > + error = memcpy_mcsafe(addr, iomap.inline_data, > + PAGE_SIZE); > + if (error < 0) > + goto error_finish_iomap; > + } else > + memset(addr, 0, PAGE_SIZE); This memcpy_mcsafe/memset chunk shows up a lot in this series. Maybe it should be a static inline within dax.c? --D > + } > entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, > 0, write && !sync); > > @@ -1597,7 +1608,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, > > switch (iomap.type) { > case IOMAP_MAPPED: > - error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn); > + error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn, NULL); > if (error < 0) > goto finish_iomap; > > -- > 2.16.4 >
On 10:46 21/05, Darrick J. Wong wrote: > On Mon, Apr 29, 2019 at 12:26:39PM -0500, Goldwyn Rodrigues wrote: > > From: Goldwyn Rodrigues <rgoldwyn@suse.com> > > > > Change dax_iomap_pfn to return the address as well in order to > > use it for performing a memcpy in case the type is IOMAP_DAX_COW. > > We don't handle PMD because btrfs does not support hugepages. > > > > Question: > > The sequence of bdev_dax_pgoff() and dax_direct_access() is > > used multiple times to calculate address and pfn's. Would it make > > sense to call it while calculating address as well to reduce code? > > > > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> > > --- > > fs/dax.c | 19 +++++++++++++++---- > > 1 file changed, 15 insertions(+), 4 deletions(-) > > > > diff --git a/fs/dax.c b/fs/dax.c > > index 610bfa861a28..718b1632a39d 100644 > > --- a/fs/dax.c > > +++ b/fs/dax.c > > @@ -984,7 +984,7 @@ static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) > > } > > > > static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, > > - pfn_t *pfnp) > > + pfn_t *pfnp, void **addr) > > { > > const sector_t sector = dax_iomap_sector(iomap, pos); > > pgoff_t pgoff; > > @@ -996,7 +996,7 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, > > return rc; > > id = dax_read_lock(); > > length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), > > - NULL, pfnp); > > + addr, pfnp); > > if (length < 0) { > > rc = length; > > goto out; > > @@ -1286,6 +1286,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, > > XA_STATE(xas, &mapping->i_pages, vmf->pgoff); > > struct inode *inode = mapping->host; > > unsigned long vaddr = vmf->address; > > + void *addr; > > loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT; > > struct iomap iomap = { 0 }; > > Ugh, I had forgotten that fs/dax.c open-codes iomap_apply, probably > because the actor returns vm_fault_t, not bytes copied. I guess that > makes it a tiny bit more complicated to pass in two (struct iomap *) to > the iomap_begin function... I am not sure I understand this. We do not use iomap_apply() in the fault path: dax_iomap_pte_fault(). We just use iomap_begin() and iomap_end(). So, why can we not implement your idea of using two iomaps? What does open-coding iomap-apply mean?
On Wed, May 22, 2019 at 02:11:39PM -0500, Goldwyn Rodrigues wrote: > On 10:46 21/05, Darrick J. Wong wrote: > > On Mon, Apr 29, 2019 at 12:26:39PM -0500, Goldwyn Rodrigues wrote: > > > From: Goldwyn Rodrigues <rgoldwyn@suse.com> > > > > > > Change dax_iomap_pfn to return the address as well in order to > > > use it for performing a memcpy in case the type is IOMAP_DAX_COW. > > > We don't handle PMD because btrfs does not support hugepages. > > > > > > Question: > > > The sequence of bdev_dax_pgoff() and dax_direct_access() is > > > used multiple times to calculate address and pfn's. Would it make > > > sense to call it while calculating address as well to reduce code? > > > > > > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> > > > --- > > > fs/dax.c | 19 +++++++++++++++---- > > > 1 file changed, 15 insertions(+), 4 deletions(-) > > > > > > diff --git a/fs/dax.c b/fs/dax.c > > > index 610bfa861a28..718b1632a39d 100644 > > > --- a/fs/dax.c > > > +++ b/fs/dax.c > > > @@ -984,7 +984,7 @@ static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) > > > } > > > > > > static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, > > > - pfn_t *pfnp) > > > + pfn_t *pfnp, void **addr) > > > { > > > const sector_t sector = dax_iomap_sector(iomap, pos); > > > pgoff_t pgoff; > > > @@ -996,7 +996,7 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, > > > return rc; > > > id = dax_read_lock(); > > > length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), > > > - NULL, pfnp); > > > + addr, pfnp); > > > if (length < 0) { > > > rc = length; > > > goto out; > > > @@ -1286,6 +1286,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, > > > XA_STATE(xas, &mapping->i_pages, vmf->pgoff); > > > struct inode *inode = mapping->host; > > > unsigned long vaddr = vmf->address; > > > + void *addr; > > > loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT; > > > struct iomap iomap = { 0 }; > > > > Ugh, I had forgotten that fs/dax.c open-codes iomap_apply, probably > > because the actor returns vm_fault_t, not bytes copied. I guess that > > makes it a tiny bit more complicated to pass in two (struct iomap *) to > > the iomap_begin function... > > I am not sure I understand this. We do not use iomap_apply() in > the fault path: dax_iomap_pte_fault(). We just use iomap_begin() > and iomap_end(). So, why can we not implement your idea of using two > iomaps? Oh, sorry, I wasn't trying to say that calling ->iomap_begin made it *impossible* to implement. I was merely complaining about the increased maintenance burden that results from open coding -- now there are three places where we have to change a struct iomap declaration, not one (iomap_apply) as I had originally thought. > What does open-coding iomap-apply mean? Any function that calls (1) ->iomap_begin; (2) performs an action on the returned iomap; and (3) then calls calling ->iomap_end. That's what iomap_apply() does. Really I'm just being maintainer-cranky. Ignore me for now. :) --D > > -- > Goldwyn
On Tue 21-05-19 10:46:25, Darrick J. Wong wrote: > On Mon, Apr 29, 2019 at 12:26:39PM -0500, Goldwyn Rodrigues wrote: > > From: Goldwyn Rodrigues <rgoldwyn@suse.com> > > > > Change dax_iomap_pfn to return the address as well in order to > > use it for performing a memcpy in case the type is IOMAP_DAX_COW. > > We don't handle PMD because btrfs does not support hugepages. > > > > Question: > > The sequence of bdev_dax_pgoff() and dax_direct_access() is > > used multiple times to calculate address and pfn's. Would it make > > sense to call it while calculating address as well to reduce code? > > > > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> > > --- > > fs/dax.c | 19 +++++++++++++++---- > > 1 file changed, 15 insertions(+), 4 deletions(-) > > > > diff --git a/fs/dax.c b/fs/dax.c > > index 610bfa861a28..718b1632a39d 100644 > > --- a/fs/dax.c > > +++ b/fs/dax.c > > @@ -984,7 +984,7 @@ static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) > > } > > > > static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, > > - pfn_t *pfnp) > > + pfn_t *pfnp, void **addr) > > { > > const sector_t sector = dax_iomap_sector(iomap, pos); > > pgoff_t pgoff; > > @@ -996,7 +996,7 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, > > return rc; > > id = dax_read_lock(); > > length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), > > - NULL, pfnp); > > + addr, pfnp); > > if (length < 0) { > > rc = length; > > goto out; > > @@ -1286,6 +1286,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, > > XA_STATE(xas, &mapping->i_pages, vmf->pgoff); > > struct inode *inode = mapping->host; > > unsigned long vaddr = vmf->address; > > + void *addr; > > loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT; > > struct iomap iomap = { 0 }; > > Ugh, I had forgotten that fs/dax.c open-codes iomap_apply, probably > because the actor returns vm_fault_t, not bytes copied. I guess that > makes it a tiny bit more complicated to pass in two (struct iomap *) to > the iomap_begin function... Hum, right. We could actually reimplement dax_iomap_{pte|pmd}_fault() using iomap_apply(). We would just need to propagate error code out of our 'actor' inside the structure pointed to by 'data'. But that's doable. Honza
diff --git a/fs/dax.c b/fs/dax.c index 610bfa861a28..718b1632a39d 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -984,7 +984,7 @@ static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) } static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, - pfn_t *pfnp) + pfn_t *pfnp, void **addr) { const sector_t sector = dax_iomap_sector(iomap, pos); pgoff_t pgoff; @@ -996,7 +996,7 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, return rc; id = dax_read_lock(); length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), - NULL, pfnp); + addr, pfnp); if (length < 0) { rc = length; goto out; @@ -1286,6 +1286,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, XA_STATE(xas, &mapping->i_pages, vmf->pgoff); struct inode *inode = mapping->host; unsigned long vaddr = vmf->address; + void *addr; loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT; struct iomap iomap = { 0 }; unsigned flags = IOMAP_FAULT; @@ -1375,16 +1376,26 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, sync = dax_fault_is_synchronous(flags, vma, &iomap); switch (iomap.type) { + case IOMAP_DAX_COW: case IOMAP_MAPPED: if (iomap.flags & IOMAP_F_NEW) { count_vm_event(PGMAJFAULT); count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); major = VM_FAULT_MAJOR; } - error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn); + error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn, &addr); if (error < 0) goto error_finish_iomap; + if (iomap.type == IOMAP_DAX_COW) { + if (iomap.inline_data) { + error = memcpy_mcsafe(addr, iomap.inline_data, + PAGE_SIZE); + if (error < 0) + goto error_finish_iomap; + } else + memset(addr, 0, PAGE_SIZE); + } entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, 0, write && !sync); @@ -1597,7 +1608,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, switch (iomap.type) { case IOMAP_MAPPED: - error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn); + error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn, NULL); if (error < 0) goto finish_iomap;