From patchwork Wed Jun 22 15:27:15 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Christoph Hellwig X-Patchwork-Id: 9193185 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork.web.codeaurora.org (Postfix) with ESMTP id 3F779601C0 for ; Wed, 22 Jun 2016 15:44:12 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 2F43E283FD for ; Wed, 22 Jun 2016 15:44:12 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 240B52840F; Wed, 22 Jun 2016 15:44:12 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-6.9 required=2.0 tests=BAYES_00,RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 416FC283FD for ; Wed, 22 Jun 2016 15:44:11 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752354AbcFVPoJ (ORCPT ); Wed, 22 Jun 2016 11:44:09 -0400 Received: from bombadil.infradead.org ([198.137.202.9]:52875 "EHLO bombadil.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751146AbcFVPoI (ORCPT ); Wed, 22 Jun 2016 11:44:08 -0400 Received: from [83.175.99.196] (helo=localhost) by bombadil.infradead.org with esmtpsa (Exim 4.85_2 #1 (Red Hat Linux)) id 1bFk41-0006iS-KQ; Wed, 22 Jun 2016 15:27:38 +0000 From: Christoph Hellwig To: xfs@oss.sgi.com Cc: linux-nvdimm@ml01.01.org, linux-fsdevel@vger.kernel.org Subject: [PATCH 7/8] xfs: split direct I/O and DAX path Date: Wed, 22 Jun 2016 17:27:15 +0200 Message-Id: <1466609236-23801-8-git-send-email-hch@lst.de> X-Mailer: git-send-email 2.1.4 In-Reply-To: <1466609236-23801-1-git-send-email-hch@lst.de> References: <1466609236-23801-1-git-send-email-hch@lst.de> MIME-Version: 1.0 X-SRS-Rewrite: SMTP reverse-path rewritten from by bombadil.infradead.org. See http://www.infradead.org/rpr.html Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP So far the DAX code overloaded the direct I/O code path. There is very little in common between the two, and untangling them allows to clean up both variants. As a Ń•ide effect we also get separate trace points for both I/O types. Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_file.c | 139 ++++++++++++++++++++++++++++++++++++++++++----------- fs/xfs/xfs_trace.h | 2 + 2 files changed, 112 insertions(+), 29 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 24b267d..0e74325 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -262,13 +262,11 @@ xfs_file_dio_aio_read( else target = ip->i_mount->m_ddev_targp; - if (!IS_DAX(inode)) { - /* DIO must be aligned to device logical sector size */ - if ((iocb->ki_pos | count) & target->bt_logical_sectormask) { - if (iocb->ki_pos == isize) - return 0; - return -EINVAL; - } + /* DIO must be aligned to device logical sector size */ + if ((iocb->ki_pos | count) & target->bt_logical_sectormask) { + if (iocb->ki_pos == isize) + return 0; + return -EINVAL; } /* @@ -317,13 +315,37 @@ xfs_file_dio_aio_read( } data = *to; - if (IS_DAX(inode)) { - ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, - NULL, 0); - } else { - ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, - xfs_get_blocks_direct, NULL, NULL, 0); + ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, + xfs_get_blocks_direct, NULL, NULL, 0); + if (ret > 0) { + iocb->ki_pos += ret; + iov_iter_advance(to, ret); } + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); + + file_accessed(iocb->ki_filp); + return ret; +} + +STATIC ssize_t +xfs_file_dax_read( + struct kiocb *iocb, + struct iov_iter *to) +{ + struct address_space *mapping = iocb->ki_filp->f_mapping; + struct inode *inode = mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct iov_iter data = *to; + size_t count = iov_iter_count(to); + ssize_t ret = 0; + + trace_xfs_file_dax_read(ip, count, iocb->ki_pos); + + if (!count) + return 0; /* skip atime */ + + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); + ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0); if (ret > 0) { iocb->ki_pos += ret; iov_iter_advance(to, ret); @@ -356,7 +378,8 @@ xfs_file_read_iter( struct kiocb *iocb, struct iov_iter *to) { - struct xfs_mount *mp = XFS_I(file_inode(iocb->ki_filp))->i_mount; + struct inode *inode = file_inode(iocb->ki_filp); + struct xfs_mount *mp = XFS_I(inode)->i_mount; ssize_t ret = 0; XFS_STATS_INC(mp, xs_read_calls); @@ -364,7 +387,9 @@ xfs_file_read_iter( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if (iocb->ki_flags & IOCB_DIRECT) + if (IS_DAX(inode)) + ret = xfs_file_dax_read(iocb, to); + else if (iocb->ki_flags & IOCB_DIRECT) ret = xfs_file_dio_aio_read(iocb, to); else ret = xfs_file_buffered_aio_read(iocb, to); @@ -586,8 +611,7 @@ xfs_file_dio_aio_write( mp->m_rtdev_targp : mp->m_ddev_targp; /* DIO must be aligned to device logical sector size */ - if (!IS_DAX(inode) && - ((iocb->ki_pos | count) & target->bt_logical_sectormask)) + if ((iocb->ki_pos | count) & target->bt_logical_sectormask) return -EINVAL; /* "unaligned" here means not aligned to a filesystem block */ @@ -656,14 +680,9 @@ xfs_file_dio_aio_write( trace_xfs_file_direct_write(ip, count, iocb->ki_pos); data = *from; - if (IS_DAX(inode)) { - ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, - xfs_end_io_direct_write, 0); - } else { - ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, - xfs_get_blocks_direct, xfs_end_io_direct_write, - NULL, DIO_ASYNC_EXTEND); - } + ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, + xfs_get_blocks_direct, xfs_end_io_direct_write, + NULL, DIO_ASYNC_EXTEND); /* see generic_file_direct_write() for why this is necessary */ if (mapping->nrpages) { @@ -680,10 +699,70 @@ out: xfs_rw_iunlock(ip, iolock); /* - * No fallback to buffered IO on errors for XFS. DAX can result in - * partial writes, but direct IO will either complete fully or fail. + * No fallback to buffered IO on errors for XFS, direct IO will either + * complete fully or fail. + */ + ASSERT(ret < 0 || ret == count); + return ret; +} + +STATIC ssize_t +xfs_file_dax_write( + struct kiocb *iocb, + struct iov_iter *from) +{ + struct address_space *mapping = iocb->ki_filp->f_mapping; + struct inode *inode = mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + ssize_t ret = 0; + int unaligned_io = 0; + int iolock; + struct iov_iter data; + + /* "unaligned" here means not aligned to a filesystem block */ + if ((iocb->ki_pos & mp->m_blockmask) || + ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) { + unaligned_io = 1; + iolock = XFS_IOLOCK_EXCL; + } else if (mapping->nrpages) { + iolock = XFS_IOLOCK_EXCL; + } else { + iolock = XFS_IOLOCK_SHARED; + } + xfs_rw_ilock(ip, iolock); + + ret = xfs_file_aio_write_checks(iocb, from, &iolock); + if (ret) + goto out; + + /* + * Yes, even DAX files can have page cache attached to them: A zeroed + * page is inserted into the pagecache when we have to serve a write + * fault on a hole. It should never be dirtied and can simply be + * dropped from the pagecache once we get real data for the page. */ - ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip))); + if (mapping->nrpages) { + ret = invalidate_inode_pages2(mapping); + WARN_ON_ONCE(ret); + } + + if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) { + xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); + iolock = XFS_IOLOCK_SHARED; + } + + trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos); + + data = *from; + ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, + xfs_end_io_direct_write, 0); + if (ret > 0) { + iocb->ki_pos += ret; + iov_iter_advance(from, ret); + } +out: + xfs_rw_iunlock(ip, iolock); return ret; } @@ -765,7 +844,9 @@ xfs_file_write_iter( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode)) + if (IS_DAX(inode)) + ret = xfs_file_dax_write(iocb, from); + else if (iocb->ki_flags & IOCB_DIRECT) ret = xfs_file_dio_aio_write(iocb, from); else ret = xfs_file_buffered_aio_write(iocb, from); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 2504f94..1451690 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1165,8 +1165,10 @@ DEFINE_EVENT(xfs_file_class, name, \ TP_ARGS(ip, count, offset)) DEFINE_RW_EVENT(xfs_file_buffered_read); DEFINE_RW_EVENT(xfs_file_direct_read); +DEFINE_RW_EVENT(xfs_file_dax_read); DEFINE_RW_EVENT(xfs_file_buffered_write); DEFINE_RW_EVENT(xfs_file_direct_write); +DEFINE_RW_EVENT(xfs_file_dax_write); DEFINE_RW_EVENT(xfs_file_splice_read); DECLARE_EVENT_CLASS(xfs_page_class,