Message ID | 20250217093207.3769550-2-hch@lst.de (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [1/4] xfs: reduce context switches for synchronous buffered I/O | expand |
On Mon, Feb 17, 2025 at 10:31:26AM +0100, Christoph Hellwig wrote: > Currently all metadata I/O completions happen in the m_buf_workqueue > workqueue. But for synchronous I/O (i.e. all buffer reads) there is no > need for that, as there always is a called in process context that is > waiting for the I/O. Factor out the guts of xfs_buf_ioend into a > separate helper and call it from xfs_buf_iowait to avoid a double > an extra context switch to the workqueue. > > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > fs/xfs/xfs_buf.c | 42 ++++++++++++++++++++++++++---------------- > 1 file changed, 26 insertions(+), 16 deletions(-) > > diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c > index 15bb790359f8..050f2c2f6a40 100644 > --- a/fs/xfs/xfs_buf.c > +++ b/fs/xfs/xfs_buf.c > @@ -1345,6 +1345,7 @@ xfs_buf_ioend_handle_error( > resubmit: > xfs_buf_ioerror(bp, 0); > bp->b_flags |= (XBF_DONE | XBF_WRITE_FAIL); > + reinit_completion(&bp->b_iowait); > xfs_buf_submit(bp); > return true; > out_stale: > @@ -1355,8 +1356,8 @@ xfs_buf_ioend_handle_error( > return false; > } > > -static void > -xfs_buf_ioend( > +static bool > +__xfs_buf_ioend( What does the return value here indicate? I /think/ it's true if the IO is really complete, or false if we're going to retry? But maybe it actually means true if the bp reference is still live? A comment would be really helpful here. --D > struct xfs_buf *bp) > { > trace_xfs_buf_iodone(bp, _RET_IP_); > @@ -1376,7 +1377,7 @@ xfs_buf_ioend( > } > > if (unlikely(bp->b_error) && xfs_buf_ioend_handle_error(bp)) > - return; > + return false; > > /* clear the retry state */ > bp->b_last_error = 0; > @@ -1397,7 +1398,15 @@ xfs_buf_ioend( > > bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD | > _XBF_LOGRECOVERY); > + return true; > +} > > +static void > +xfs_buf_ioend( > + struct xfs_buf *bp) > +{ > + if (!__xfs_buf_ioend(bp)) > + return; > if (bp->b_flags & XBF_ASYNC) > xfs_buf_relse(bp); > else > @@ -1411,15 +1420,8 @@ xfs_buf_ioend_work( > struct xfs_buf *bp = > container_of(work, struct xfs_buf, b_ioend_work); > > - xfs_buf_ioend(bp); > -} > - > -static void > -xfs_buf_ioend_async( > - struct xfs_buf *bp) > -{ > - INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); > - queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work); > + if (__xfs_buf_ioend(bp)) > + xfs_buf_relse(bp); > } > > void > @@ -1491,7 +1493,13 @@ xfs_buf_bio_end_io( > XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR)) > xfs_buf_ioerror(bp, -EIO); > > - xfs_buf_ioend_async(bp); > + if (bp->b_flags & XBF_ASYNC) { > + INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); > + queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work); > + } else { > + complete(&bp->b_iowait); > + } > + > bio_put(bio); > } > > @@ -1568,9 +1576,11 @@ xfs_buf_iowait( > { > ASSERT(!(bp->b_flags & XBF_ASYNC)); > > - trace_xfs_buf_iowait(bp, _RET_IP_); > - wait_for_completion(&bp->b_iowait); > - trace_xfs_buf_iowait_done(bp, _RET_IP_); > + do { > + trace_xfs_buf_iowait(bp, _RET_IP_); > + wait_for_completion(&bp->b_iowait); > + trace_xfs_buf_iowait_done(bp, _RET_IP_); > + } while (!__xfs_buf_ioend(bp)); > > return bp->b_error; > } > -- > 2.45.2 > >
On Tue, Feb 18, 2025 at 12:05:51PM -0800, Darrick J. Wong wrote: > What does the return value here indicate? I /think/ it's true if the IO > is really complete, or false if we're going to retry? Yes. > But maybe it > actually means true if the bp reference is still live? A comment would > be really helpful here. Sure, I'll add one.
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 15bb790359f8..050f2c2f6a40 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1345,6 +1345,7 @@ xfs_buf_ioend_handle_error( resubmit: xfs_buf_ioerror(bp, 0); bp->b_flags |= (XBF_DONE | XBF_WRITE_FAIL); + reinit_completion(&bp->b_iowait); xfs_buf_submit(bp); return true; out_stale: @@ -1355,8 +1356,8 @@ xfs_buf_ioend_handle_error( return false; } -static void -xfs_buf_ioend( +static bool +__xfs_buf_ioend( struct xfs_buf *bp) { trace_xfs_buf_iodone(bp, _RET_IP_); @@ -1376,7 +1377,7 @@ xfs_buf_ioend( } if (unlikely(bp->b_error) && xfs_buf_ioend_handle_error(bp)) - return; + return false; /* clear the retry state */ bp->b_last_error = 0; @@ -1397,7 +1398,15 @@ xfs_buf_ioend( bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD | _XBF_LOGRECOVERY); + return true; +} +static void +xfs_buf_ioend( + struct xfs_buf *bp) +{ + if (!__xfs_buf_ioend(bp)) + return; if (bp->b_flags & XBF_ASYNC) xfs_buf_relse(bp); else @@ -1411,15 +1420,8 @@ xfs_buf_ioend_work( struct xfs_buf *bp = container_of(work, struct xfs_buf, b_ioend_work); - xfs_buf_ioend(bp); -} - -static void -xfs_buf_ioend_async( - struct xfs_buf *bp) -{ - INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); - queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work); + if (__xfs_buf_ioend(bp)) + xfs_buf_relse(bp); } void @@ -1491,7 +1493,13 @@ xfs_buf_bio_end_io( XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR)) xfs_buf_ioerror(bp, -EIO); - xfs_buf_ioend_async(bp); + if (bp->b_flags & XBF_ASYNC) { + INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); + queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work); + } else { + complete(&bp->b_iowait); + } + bio_put(bio); } @@ -1568,9 +1576,11 @@ xfs_buf_iowait( { ASSERT(!(bp->b_flags & XBF_ASYNC)); - trace_xfs_buf_iowait(bp, _RET_IP_); - wait_for_completion(&bp->b_iowait); - trace_xfs_buf_iowait_done(bp, _RET_IP_); + do { + trace_xfs_buf_iowait(bp, _RET_IP_); + wait_for_completion(&bp->b_iowait); + trace_xfs_buf_iowait_done(bp, _RET_IP_); + } while (!__xfs_buf_ioend(bp)); return bp->b_error; }
Currently all metadata I/O completions happen in the m_buf_workqueue workqueue. But for synchronous I/O (i.e. all buffer reads) there is no need for that, as there always is a called in process context that is waiting for the I/O. Factor out the guts of xfs_buf_ioend into a separate helper and call it from xfs_buf_iowait to avoid a double an extra context switch to the workqueue. Signed-off-by: Christoph Hellwig <hch@lst.de> --- fs/xfs/xfs_buf.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-)