Message ID | 20181205122835.19290-7-rgoldwyn@suse.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: Support for DAX devices | expand |
On 05/12/2018 13:28, Goldwyn Rodrigues wrote: [...] > +static int copy_extent_page(struct extent_map *em, void *daddr, u64 pos) > +{ > + struct dax_device *dax_dev; ^ space instead of tabs? > + void *saddr; > + sector_t start; > + size_t len; > + > + if (em->block_start == EXTENT_MAP_HOLE) { > + memset(daddr, 0, PAGE_SIZE); > + } else { > + dax_dev = fs_dax_get_by_bdev(em->bdev); > + start = (get_start_sect(em->bdev) << 9) + (em->block_start + (pos - em->start)); > + len = dax_direct_access(dax_dev, PHYS_PFN(start), 1, &saddr, NULL); > + memcpy(daddr, saddr, PAGE_SIZE); > + } > + free_extent_map(em); > + > + return 0; > +} > + > + copy_extent_page() always returns 0, why not make it void? Plus a nit: double newline. > +ssize_t btrfs_file_dax_write(struct kiocb *iocb, struct iov_iter *from) > +{ > + ssize_t ret, done = 0, count = iov_iter_count(from); > + struct inode *inode = file_inode(iocb->ki_filp); ^ again spaces vs tabs. > + u64 pos = iocb->ki_pos; > + u64 start = round_down(pos, PAGE_SIZE); > + u64 end = round_up(pos + count, PAGE_SIZE); > + struct extent_state *cached_state = NULL; > + struct extent_changeset *data_reserved = NULL; > + struct extent_map *first = NULL, *last = NULL; > + > + ret = btrfs_delalloc_reserve_space(inode, &data_reserved, start, end - start); > + if (ret < 0) > + return ret; > + > + /* Grab a reference of the first extent to copy data */ > + if (start < pos) { > + first = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, end - start, 0); > + if (IS_ERR(first)) { > + ret = PTR_ERR(first); > + goto out2; > + } > + } You're using 'end - start' at least twice here, maybe you could move 'len' out of the loop and use it for btrfs_delalloc_reserve_space() and btrfs_get_extent() as well. > + > + /* Grab a reference of the last extent to copy data */ > + if (pos + count < end) { > + last = btrfs_get_extent(BTRFS_I(inode), NULL, 0, end - PAGE_SIZE, PAGE_SIZE, 0); > + if (IS_ERR(last)) { > + ret = PTR_ERR(last); > + goto out2; > + } > + } > + > + lock_extent_bits(&BTRFS_I(inode)->io_tree, start, end, &cached_state); > + while (done < count) { > + struct extent_map *em; > + struct dax_device *dax_dev; > + int offset = pos & (PAGE_SIZE - 1); > + u64 estart = round_down(pos, PAGE_SIZE); > + u64 elen = end - estart; > + size_t len = count - done; > + sector_t dstart; > + void *daddr; > + ssize_t maplen; > + > + /* Read the current extent */ > + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, estart, elen, 0); Space again. > + if (IS_ERR(em)) { > + ret = PTR_ERR(em); > + goto out; > + } > + > + /* Get a new extent */ > + ret = btrfs_get_extent_map_write(&em, NULL, inode, estart, elen); > + if (ret < 0) > + goto out; > + > + dax_dev = fs_dax_get_by_bdev(em->bdev); > + /* Calculate start address start of destination extent */ > + dstart = (get_start_sect(em->bdev) << 9) + em->block_start; > + maplen = dax_direct_access(dax_dev, PHYS_PFN(dstart), > + PHYS_PFN(em->len), &daddr, NULL); > + > + /* Copy front of extent page */ > + if (offset) > + ret = copy_extent_page(first, daddr, estart); > + > + /* Copy end of extent page */ > + if ((pos + len > estart + PAGE_SIZE) && (pos + len < em->start + em->len)) > + ret = copy_extent_page(last, daddr + em->len - PAGE_SIZE, em->start + em->len - PAGE_SIZE); > + > + /* Copy the data from the iter */ > + maplen = PFN_PHYS(maplen); > + maplen -= offset; > + ret = dax_copy_from_iter(dax_dev, dstart, daddr + offset, maplen, from); > + if (ret < 0) > + goto out; > + pos += ret; > + done += ret; > + } > +out: out_unlock? > + unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, end, &cached_state); > + if (done) { > + btrfs_update_ordered_extent(inode, start, > + end - start, true); > + iocb->ki_pos += done; > + if (iocb->ki_pos > i_size_read(inode)) > + i_size_write(inode, iocb->ki_pos); > + } > + > + btrfs_delalloc_release_extents(BTRFS_I(inode), count, false); > +out2: out? > + if (count - done > 0) > + btrfs_delalloc_release_space(inode, data_reserved, pos, > + count - done, true); > + extent_changeset_free(data_reserved); > + return done ? done : ret; > + > +} > diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c > index ef6ed93f44d1..29a3b12e6660 100644 > --- a/fs/btrfs/file.c > +++ b/fs/btrfs/file.c > @@ -1964,7 +1964,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, > if (sync) > atomic_inc(&BTRFS_I(inode)->sync_writers); > > - if (iocb->ki_flags & IOCB_DIRECT) { > + if (IS_DAX(inode)) { > + num_written = btrfs_file_dax_write(iocb, from); > + } else if (iocb->ki_flags & IOCB_DIRECT) { > num_written = __btrfs_direct_write(iocb, from); > } else { > num_written = btrfs_buffered_write(iocb, from); >
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a0d296b0d826..d91ff283a966 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3693,6 +3693,7 @@ int btree_readahead_hook(struct extent_buffer *eb, int err); #ifdef CONFIG_FS_DAX /* dax.c */ ssize_t btrfs_file_dax_read(struct kiocb *iocb, struct iov_iter *to); +ssize_t btrfs_file_dax_write(struct kiocb *iocb, struct iov_iter *from); #endif /* CONFIG_FS_DAX */ static inline int is_fstree(u64 rootid) diff --git a/fs/btrfs/dax.c b/fs/btrfs/dax.c index 5a297674adec..4000259a426c 100644 --- a/fs/btrfs/dax.c +++ b/fs/btrfs/dax.c @@ -2,6 +2,7 @@ #include <linux/uio.h> #include "ctree.h" #include "btrfs_inode.h" +#include "extent_io.h" static ssize_t em_dax_rw(struct inode *inode, struct extent_map *em, u64 pos, u64 len, struct iov_iter *iter) @@ -71,3 +72,123 @@ ssize_t btrfs_file_dax_read(struct kiocb *iocb, struct iov_iter *to) return done ? done : ret; } +static int copy_extent_page(struct extent_map *em, void *daddr, u64 pos) +{ + struct dax_device *dax_dev; + void *saddr; + sector_t start; + size_t len; + + if (em->block_start == EXTENT_MAP_HOLE) { + memset(daddr, 0, PAGE_SIZE); + } else { + dax_dev = fs_dax_get_by_bdev(em->bdev); + start = (get_start_sect(em->bdev) << 9) + (em->block_start + (pos - em->start)); + len = dax_direct_access(dax_dev, PHYS_PFN(start), 1, &saddr, NULL); + memcpy(daddr, saddr, PAGE_SIZE); + } + free_extent_map(em); + + return 0; +} + + +ssize_t btrfs_file_dax_write(struct kiocb *iocb, struct iov_iter *from) +{ + ssize_t ret, done = 0, count = iov_iter_count(from); + struct inode *inode = file_inode(iocb->ki_filp); + u64 pos = iocb->ki_pos; + u64 start = round_down(pos, PAGE_SIZE); + u64 end = round_up(pos + count, PAGE_SIZE); + struct extent_state *cached_state = NULL; + struct extent_changeset *data_reserved = NULL; + struct extent_map *first = NULL, *last = NULL; + + ret = btrfs_delalloc_reserve_space(inode, &data_reserved, start, end - start); + if (ret < 0) + return ret; + + /* Grab a reference of the first extent to copy data */ + if (start < pos) { + first = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, end - start, 0); + if (IS_ERR(first)) { + ret = PTR_ERR(first); + goto out2; + } + } + + /* Grab a reference of the last extent to copy data */ + if (pos + count < end) { + last = btrfs_get_extent(BTRFS_I(inode), NULL, 0, end - PAGE_SIZE, PAGE_SIZE, 0); + if (IS_ERR(last)) { + ret = PTR_ERR(last); + goto out2; + } + } + + lock_extent_bits(&BTRFS_I(inode)->io_tree, start, end, &cached_state); + while (done < count) { + struct extent_map *em; + struct dax_device *dax_dev; + int offset = pos & (PAGE_SIZE - 1); + u64 estart = round_down(pos, PAGE_SIZE); + u64 elen = end - estart; + size_t len = count - done; + sector_t dstart; + void *daddr; + ssize_t maplen; + + /* Read the current extent */ + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, estart, elen, 0); + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto out; + } + + /* Get a new extent */ + ret = btrfs_get_extent_map_write(&em, NULL, inode, estart, elen); + if (ret < 0) + goto out; + + dax_dev = fs_dax_get_by_bdev(em->bdev); + /* Calculate start address start of destination extent */ + dstart = (get_start_sect(em->bdev) << 9) + em->block_start; + maplen = dax_direct_access(dax_dev, PHYS_PFN(dstart), + PHYS_PFN(em->len), &daddr, NULL); + + /* Copy front of extent page */ + if (offset) + ret = copy_extent_page(first, daddr, estart); + + /* Copy end of extent page */ + if ((pos + len > estart + PAGE_SIZE) && (pos + len < em->start + em->len)) + ret = copy_extent_page(last, daddr + em->len - PAGE_SIZE, em->start + em->len - PAGE_SIZE); + + /* Copy the data from the iter */ + maplen = PFN_PHYS(maplen); + maplen -= offset; + ret = dax_copy_from_iter(dax_dev, dstart, daddr + offset, maplen, from); + if (ret < 0) + goto out; + pos += ret; + done += ret; + } +out: + unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, end, &cached_state); + if (done) { + btrfs_update_ordered_extent(inode, start, + end - start, true); + iocb->ki_pos += done; + if (iocb->ki_pos > i_size_read(inode)) + i_size_write(inode, iocb->ki_pos); + } + + btrfs_delalloc_release_extents(BTRFS_I(inode), count, false); +out2: + if (count - done > 0) + btrfs_delalloc_release_space(inode, data_reserved, pos, + count - done, true); + extent_changeset_free(data_reserved); + return done ? done : ret; + +} diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ef6ed93f44d1..29a3b12e6660 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1964,7 +1964,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, if (sync) atomic_inc(&BTRFS_I(inode)->sync_writers); - if (iocb->ki_flags & IOCB_DIRECT) { + if (IS_DAX(inode)) { + num_written = btrfs_file_dax_write(iocb, from); + } else if (iocb->ki_flags & IOCB_DIRECT) { num_written = __btrfs_direct_write(iocb, from); } else { num_written = btrfs_buffered_write(iocb, from);