Message ID | 20240816-vfs-misc-dio-v1-1-80fe21a2c710@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | inode: remove __I_DIO_WAKEUP | expand |
On Fri, Aug 16, 2024 at 04:35:52PM +0200, Christian Brauner wrote: > Afaict, we can just rely on inode->i_dio_count for waiting instead of > this awkward indirection through __I_DIO_WAKEUP. This survives LTP dio > and xfstests dio tests. > > Signed-off-by: Christian Brauner <brauner@kernel.org> Reviewed-by: Josef Bacik <josef@toxicpanda.com> Thanks, Josef
On Fri, 2024-08-16 at 16:35 +0200, Christian Brauner wrote: > Afaict, we can just rely on inode->i_dio_count for waiting instead of > this awkward indirection through __I_DIO_WAKEUP. This survives LTP > dio > and xfstests dio tests. > > Signed-off-by: Christian Brauner <brauner@kernel.org> > --- > --- > fs/inode.c | 23 +++++++++++------------ > fs/netfs/locking.c | 18 +++--------------- > include/linux/fs.h | 9 ++++----- > 3 files changed, 18 insertions(+), 32 deletions(-) > > diff --git a/fs/inode.c b/fs/inode.c > index 7a4e27606fca..46bf05d826db 100644 > --- a/fs/inode.c > +++ b/fs/inode.c > @@ -2465,18 +2465,12 @@ EXPORT_SYMBOL(inode_owner_or_capable); > /* > * Direct i/o helper functions > */ > -static void __inode_dio_wait(struct inode *inode) > +bool inode_dio_finished(const struct inode *inode) > { > - wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, > __I_DIO_WAKEUP); > - DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); > - > - do { > - prepare_to_wait(wq, &q.wq_entry, > TASK_UNINTERRUPTIBLE); > - if (atomic_read(&inode->i_dio_count)) > - schedule(); > - } while (atomic_read(&inode->i_dio_count)); > - finish_wait(wq, &q.wq_entry); > + smp_mb__before_atomic(); > + return atomic_read(&inode->i_dio_count) == 0; > } > +EXPORT_SYMBOL(inode_dio_finished); > > /** > * inode_dio_wait - wait for outstanding DIO requests to finish > @@ -2490,11 +2484,16 @@ static void __inode_dio_wait(struct inode > *inode) > */ > void inode_dio_wait(struct inode *inode) > { > - if (atomic_read(&inode->i_dio_count)) > - __inode_dio_wait(inode); > + wait_var_event(&inode->i_dio_count, inode_dio_finished); > } > EXPORT_SYMBOL(inode_dio_wait); > > +void inode_dio_wait_interruptible(struct inode *inode) > +{ > + wait_var_event_interruptible(&inode->i_dio_count, > inode_dio_finished); > +} > +EXPORT_SYMBOL(inode_dio_wait_interruptible); > + > /* > * inode_set_flags - atomically set some inode flags > * > diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c > index 75dc52a49b3a..c2cfdda85230 100644 > --- a/fs/netfs/locking.c > +++ b/fs/netfs/locking.c > @@ -21,23 +21,11 @@ > */ > static int inode_dio_wait_interruptible(struct inode *inode) > { > - if (!atomic_read(&inode->i_dio_count)) > + if (inode_dio_finished(inode)) > return 0; > > - wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, > __I_DIO_WAKEUP); > - DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); > - > - for (;;) { > - prepare_to_wait(wq, &q.wq_entry, > TASK_INTERRUPTIBLE); > - if (!atomic_read(&inode->i_dio_count)) > - break; > - if (signal_pending(current)) > - break; > - schedule(); > - } > - finish_wait(wq, &q.wq_entry); > - > - return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0; > + inode_dio_wait_interruptible(inode); > + return !inode_dio_finished(inode) ? -ERESTARTSYS : 0; > } > > /* Call with exclusively locked inode->i_rwsem */ > diff --git a/include/linux/fs.h b/include/linux/fs.h > index b6f2e2a1e513..f744cd918259 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -2380,8 +2380,6 @@ static inline void kiocb_clone(struct kiocb > *kiocb, struct kiocb *kiocb_src, > * > * I_REFERENCED Marks the inode as recently > references on the LRU list. > * > - * I_DIO_WAKEUP Never set. Only used as a key for > wait_on_bit(). > - * > * I_WB_SWITCH Cgroup bdi_writeback switching in progress. > Used to > * synchronize competing switching instances > and to tell > * wb stat updates to grab the i_pages lock. > See > @@ -2413,8 +2411,6 @@ static inline void kiocb_clone(struct kiocb > *kiocb, struct kiocb *kiocb_src, > #define __I_SYNC 7 > #define I_SYNC (1 << __I_SYNC) > #define I_REFERENCED (1 << 8) > -#define __I_DIO_WAKEUP 9 > -#define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) > #define I_LINKABLE (1 << 10) > #define I_DIRTY_TIME (1 << 11) > #define I_WB_SWITCH (1 << 13) > @@ -3230,6 +3226,7 @@ static inline ssize_t blockdev_direct_IO(struct > kiocb *iocb, > #endif > > void inode_dio_wait(struct inode *inode); > +void inode_dio_wait_interruptible(struct inode *inode); > > /** > * inode_dio_begin - signal start of a direct I/O requests > @@ -3241,6 +3238,7 @@ void inode_dio_wait(struct inode *inode); > static inline void inode_dio_begin(struct inode *inode) > { > atomic_inc(&inode->i_dio_count); > + smp_mb__after_atomic(); > } > > /** > @@ -3252,8 +3250,9 @@ static inline void inode_dio_begin(struct inode > *inode) > */ > static inline void inode_dio_end(struct inode *inode) > { > + smp_mb__before_atomic(); > if (atomic_dec_and_test(&inode->i_dio_count)) > - wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); > + wake_up_var(&inode->i_dio_count); > } > > extern void inode_set_flags(struct inode *inode, unsigned int flags, > > --- > base-commit: 5570f04d0bb1a34ebcb27caac76c797a7c9e36c9 > change-id: 20240816-vfs-misc-dio-5cb07eaae155 > Nice cleanup! Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc'ing Christoph as he did the original (bd5fe6c5eb9c5). Christian Brauner <brauner@kernel.org> wrote: > Afaict, we can just rely on inode->i_dio_count for waiting instead of > this awkward indirection through __I_DIO_WAKEUP. This survives LTP dio > and xfstests dio tests. > > Signed-off-by: Christian Brauner <brauner@kernel.org> > --- > --- > fs/inode.c | 23 +++++++++++------------ > fs/netfs/locking.c | 18 +++--------------- > include/linux/fs.h | 9 ++++----- > 3 files changed, 18 insertions(+), 32 deletions(-) > > diff --git a/fs/inode.c b/fs/inode.c > index 7a4e27606fca..46bf05d826db 100644 > --- a/fs/inode.c > +++ b/fs/inode.c > @@ -2465,18 +2465,12 @@ EXPORT_SYMBOL(inode_owner_or_capable); > /* > * Direct i/o helper functions > */ > -static void __inode_dio_wait(struct inode *inode) > +bool inode_dio_finished(const struct inode *inode) > { > - wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP); > - DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); > - > - do { > - prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE); > - if (atomic_read(&inode->i_dio_count)) > - schedule(); > - } while (atomic_read(&inode->i_dio_count)); > - finish_wait(wq, &q.wq_entry); > + smp_mb__before_atomic(); > + return atomic_read(&inode->i_dio_count) == 0; Can atomic_read_aquire() be used here? > } > +EXPORT_SYMBOL(inode_dio_finished); > > /** > * inode_dio_wait - wait for outstanding DIO requests to finish > @@ -2490,11 +2484,16 @@ static void __inode_dio_wait(struct inode *inode) > */ > void inode_dio_wait(struct inode *inode) > { > - if (atomic_read(&inode->i_dio_count)) > - __inode_dio_wait(inode); > + wait_var_event(&inode->i_dio_count, inode_dio_finished); > } > EXPORT_SYMBOL(inode_dio_wait); > > +void inode_dio_wait_interruptible(struct inode *inode) > +{ > + wait_var_event_interruptible(&inode->i_dio_count, inode_dio_finished); > +} > +EXPORT_SYMBOL(inode_dio_wait_interruptible); > + > /* > * inode_set_flags - atomically set some inode flags > * > diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c > index 75dc52a49b3a..c2cfdda85230 100644 > --- a/fs/netfs/locking.c > +++ b/fs/netfs/locking.c > @@ -21,23 +21,11 @@ > */ > static int inode_dio_wait_interruptible(struct inode *inode) > { > - if (!atomic_read(&inode->i_dio_count)) > + if (inode_dio_finished(inode)) > return 0; > > - wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP); > - DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); > - > - for (;;) { > - prepare_to_wait(wq, &q.wq_entry, TASK_INTERRUPTIBLE); > - if (!atomic_read(&inode->i_dio_count)) > - break; > - if (signal_pending(current)) > - break; > - schedule(); > - } > - finish_wait(wq, &q.wq_entry); > - > - return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0; > + inode_dio_wait_interruptible(inode); > + return !inode_dio_finished(inode) ? -ERESTARTSYS : 0; > } > > /* Call with exclusively locked inode->i_rwsem */ > diff --git a/include/linux/fs.h b/include/linux/fs.h > index b6f2e2a1e513..f744cd918259 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -2380,8 +2380,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, > * > * I_REFERENCED Marks the inode as recently references on the LRU list. > * > - * I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit(). > - * > * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to > * synchronize competing switching instances and to tell > * wb stat updates to grab the i_pages lock. See > @@ -2413,8 +2411,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, > #define __I_SYNC 7 > #define I_SYNC (1 << __I_SYNC) > #define I_REFERENCED (1 << 8) > -#define __I_DIO_WAKEUP 9 > -#define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) > #define I_LINKABLE (1 << 10) > #define I_DIRTY_TIME (1 << 11) > #define I_WB_SWITCH (1 << 13) > @@ -3230,6 +3226,7 @@ static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, > #endif > > void inode_dio_wait(struct inode *inode); > +void inode_dio_wait_interruptible(struct inode *inode); > > /** > * inode_dio_begin - signal start of a direct I/O requests > @@ -3241,6 +3238,7 @@ void inode_dio_wait(struct inode *inode); > static inline void inode_dio_begin(struct inode *inode) > { > atomic_inc(&inode->i_dio_count); > + smp_mb__after_atomic(); > } > > /** > @@ -3252,8 +3250,9 @@ static inline void inode_dio_begin(struct inode *inode) > */ > static inline void inode_dio_end(struct inode *inode) > { > + smp_mb__before_atomic(); > if (atomic_dec_and_test(&inode->i_dio_count)) Doesn't atomic_dec_and_test() imply full barriering? See atomic_t.txt, "ORDERING": - RMW operations that have a return value are fully ordered; > - wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); > + wake_up_var(&inode->i_dio_count); > } > > extern void inode_set_flags(struct inode *inode, unsigned int flags, > > --- > base-commit: 5570f04d0bb1a34ebcb27caac76c797a7c9e36c9 > change-id: 20240816-vfs-misc-dio-5cb07eaae155 >
On Fri, Aug 16, 2024 at 05:27:05PM +0100, David Howells wrote: > Christian Brauner <brauner@kernel.org> wrote: > > > Afaict, we can just rely on inode->i_dio_count for waiting instead of > > this awkward indirection through __I_DIO_WAKEUP. This survives LTP dio > > and xfstests dio tests. > > > > Signed-off-by: Christian Brauner <brauner@kernel.org> > > --- > > --- > > fs/inode.c | 23 +++++++++++------------ > > fs/netfs/locking.c | 18 +++--------------- > > include/linux/fs.h | 9 ++++----- > > 3 files changed, 18 insertions(+), 32 deletions(-) > > > > diff --git a/fs/inode.c b/fs/inode.c > > index 7a4e27606fca..46bf05d826db 100644 > > --- a/fs/inode.c > > +++ b/fs/inode.c > > @@ -2465,18 +2465,12 @@ EXPORT_SYMBOL(inode_owner_or_capable); > > /* > > * Direct i/o helper functions > > */ > > -static void __inode_dio_wait(struct inode *inode) > > +bool inode_dio_finished(const struct inode *inode) > > { > > - wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP); > > - DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); > > - > > - do { > > - prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE); > > - if (atomic_read(&inode->i_dio_count)) > > - schedule(); > > - } while (atomic_read(&inode->i_dio_count)); > > - finish_wait(wq, &q.wq_entry); > > + smp_mb__before_atomic(); > > + return atomic_read(&inode->i_dio_count) == 0; > > Can atomic_read_aquire() be used here? > I think the key point is that smp_mb__before_atomic is not usable with atomic_read and atomic_set -- these merely expand to regular movs, not providing a full fence (in contrast to lock-prefixed instructions on amd64). As for what ordering is needed here (and in particular would acquire do the job), I don't know yet. > > static inline void inode_dio_end(struct inode *inode) > > { > > + smp_mb__before_atomic(); > > if (atomic_dec_and_test(&inode->i_dio_count)) > > Doesn't atomic_dec_and_test() imply full barriering? See atomic_t.txt, > "ORDERING": > > - RMW operations that have a return value are fully ordered; > smp_mb__before_atomic is indeed redundant here. atomic_dec_and_test and its variants are used for refcounting relying on the implicitly provided ordering (e.g., see fput)
Mateusz Guzik <mjguzik@gmail.com> wrote:
> ... atomic_read and atomic_set -- these merely expand to regular movs, ...
Ah, no, that's not necessarily true. See arch/parisc/include/asm/atomic.h.
David
On Sat, Aug 17, 2024 at 9:25 AM David Howells <dhowells@redhat.com> wrote: > > Mateusz Guzik <mjguzik@gmail.com> wrote: > > > ... atomic_read and atomic_set -- these merely expand to regular movs, ... > > Ah, no, that's not necessarily true. See arch/parisc/include/asm/atomic.h. > In that context I was talking about amd64. atomic_read/set do happen to provide some fencing on some archs, but there are no guarantees as far as LKMM is concerned -- for example atomic_read is explicitly documented to only provide relaxed ordering (aka no guarantees). In particular on amd64 the full memory barrier normally coming with lock-prefixed instructions (and xchg with a memory operand) is not present here because this merely expands to a regular mov, making the code bogus if such a fence is indeed required in this code.
On Fri, Aug 16, 2024 at 04:35:52PM +0200, Christian Brauner wrote: > Afaict, we can just rely on inode->i_dio_count for waiting instead of > this awkward indirection through __I_DIO_WAKEUP. This survives LTP dio > and xfstests dio tests. > > Signed-off-by: Christian Brauner <brauner@kernel.org> > --- > --- > fs/inode.c | 23 +++++++++++------------ > fs/netfs/locking.c | 18 +++--------------- > include/linux/fs.h | 9 ++++----- > 3 files changed, 18 insertions(+), 32 deletions(-) > > diff --git a/fs/inode.c b/fs/inode.c > index 7a4e27606fca..46bf05d826db 100644 > --- a/fs/inode.c > +++ b/fs/inode.c > @@ -2465,18 +2465,12 @@ EXPORT_SYMBOL(inode_owner_or_capable); > /* > * Direct i/o helper functions > */ > -static void __inode_dio_wait(struct inode *inode) > +bool inode_dio_finished(const struct inode *inode) > { > - wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP); > - DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); > - > - do { > - prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE); > - if (atomic_read(&inode->i_dio_count)) > - schedule(); > - } while (atomic_read(&inode->i_dio_count)); > - finish_wait(wq, &q.wq_entry); > + smp_mb__before_atomic(); > + return atomic_read(&inode->i_dio_count) == 0; > } > +EXPORT_SYMBOL(inode_dio_finished); What is the memory barrier here for? i_dio_count is not a reference count - there are no dependent inode object changes before/after it changes that it needs to be ordered against, so I'm not sure what the purpose of this memory barrier is supposed to be... Memory barriers -always- need a comment describing the race condition they are avoiding. > /** > * inode_dio_wait - wait for outstanding DIO requests to finish > @@ -2490,11 +2484,16 @@ static void __inode_dio_wait(struct inode *inode) > */ > void inode_dio_wait(struct inode *inode) > { > - if (atomic_read(&inode->i_dio_count)) > - __inode_dio_wait(inode); > + wait_var_event(&inode->i_dio_count, inode_dio_finished); > } > EXPORT_SYMBOL(inode_dio_wait); > > +void inode_dio_wait_interruptible(struct inode *inode) > +{ > + wait_var_event_interruptible(&inode->i_dio_count, inode_dio_finished); > +} > +EXPORT_SYMBOL(inode_dio_wait_interruptible); Keep in mind that the prepare_to_wait() call inside wait_var_event_interruptible() takes the waitqueue head lock before checking the condition. This provides the necessary memory barriers to prevent wait/wakeup races checking the inode_dio_finished() state. Hence, AFAICT, no memory barriers are needed in inode_dio_finished() at all.... > /* > * inode_set_flags - atomically set some inode flags > * > diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c > index 75dc52a49b3a..c2cfdda85230 100644 > --- a/fs/netfs/locking.c > +++ b/fs/netfs/locking.c > @@ -21,23 +21,11 @@ > */ > static int inode_dio_wait_interruptible(struct inode *inode) > { > - if (!atomic_read(&inode->i_dio_count)) > + if (inode_dio_finished(inode)) > return 0; > > - wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP); > - DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); > - > - for (;;) { > - prepare_to_wait(wq, &q.wq_entry, TASK_INTERRUPTIBLE); > - if (!atomic_read(&inode->i_dio_count)) > - break; > - if (signal_pending(current)) > - break; > - schedule(); > - } > - finish_wait(wq, &q.wq_entry); > - > - return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0; > + inode_dio_wait_interruptible(inode); > + return !inode_dio_finished(inode) ? -ERESTARTSYS : 0; That looks broken. We have a private static function calling an exported function of the same name. I suspect that this static function needs to be named "netfs_dio_wait_interruptible()".... > @@ -2413,8 +2411,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, > #define __I_SYNC 7 > #define I_SYNC (1 << __I_SYNC) > #define I_REFERENCED (1 << 8) > -#define __I_DIO_WAKEUP 9 > -#define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) > #define I_LINKABLE (1 << 10) > #define I_DIRTY_TIME (1 << 11) > #define I_WB_SWITCH (1 << 13) > @@ -3230,6 +3226,7 @@ static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, > #endif > > void inode_dio_wait(struct inode *inode); > +void inode_dio_wait_interruptible(struct inode *inode); > > /** > * inode_dio_begin - signal start of a direct I/O requests > @@ -3241,6 +3238,7 @@ void inode_dio_wait(struct inode *inode); > static inline void inode_dio_begin(struct inode *inode) > { > atomic_inc(&inode->i_dio_count); > + smp_mb__after_atomic(); > } Again I have no idea waht this barrier is doing. Why does this operation need quasi-release semantics? > /** > @@ -3252,8 +3250,9 @@ static inline void inode_dio_begin(struct inode *inode) > */ > static inline void inode_dio_end(struct inode *inode) > { > + smp_mb__before_atomic(); > if (atomic_dec_and_test(&inode->i_dio_count)) > - wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); > + wake_up_var(&inode->i_dio_count); > } atomic_dec_and_test() is a RMW atomic operation with a return value, so has has fully ordered semanitcs according to Documentation/atomic_t.txt: - RMW operations that have a return value are fully ordered. [...] Fully ordered primitives are ordered against everything prior and everything subsequent. Therefore a fully ordered primitive is like having an smp_mb() before and an smp_mb() after the primitive. So there's never a need for explicit barriers before/after an atomic_dec_and_test() operation, right? -Dave.
> > - return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0; > > + inode_dio_wait_interruptible(inode); > > + return !inode_dio_finished(inode) ? -ERESTARTSYS : 0; > > That looks broken. We have a private static function calling an > exported function of the same name. I suspect that this static > function needs to be named "netfs_dio_wait_interruptible()".... Yep. I already fixed that in the tree. > atomic_dec_and_test() is a RMW atomic operation with a return value, > so has has fully ordered semanitcs according to > Documentation/atomic_t.txt: > > - RMW operations that have a return value are fully ordered. > [...] > Fully ordered primitives are ordered against everything prior and everything > subsequent. Therefore a fully ordered primitive is like having an smp_mb() > before and an smp_mb() after the primitive. > > So there's never a need for explicit barriers before/after an > atomic_dec_and_test() operation, right? Thanks for the comments on the barrires. Frankly, I added the barriers because the internal implementation of the wait_*() functions tend to confuse me as they require memory barriers to ensure that wait_var_event() sees the condition fulfilled or waitqueue_active() sees the waiter. And apparently I'm not the only one with that confusion around these apis (see Neil's series).
diff --git a/fs/inode.c b/fs/inode.c index 7a4e27606fca..46bf05d826db 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2465,18 +2465,12 @@ EXPORT_SYMBOL(inode_owner_or_capable); /* * Direct i/o helper functions */ -static void __inode_dio_wait(struct inode *inode) +bool inode_dio_finished(const struct inode *inode) { - wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP); - DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); - - do { - prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE); - if (atomic_read(&inode->i_dio_count)) - schedule(); - } while (atomic_read(&inode->i_dio_count)); - finish_wait(wq, &q.wq_entry); + smp_mb__before_atomic(); + return atomic_read(&inode->i_dio_count) == 0; } +EXPORT_SYMBOL(inode_dio_finished); /** * inode_dio_wait - wait for outstanding DIO requests to finish @@ -2490,11 +2484,16 @@ static void __inode_dio_wait(struct inode *inode) */ void inode_dio_wait(struct inode *inode) { - if (atomic_read(&inode->i_dio_count)) - __inode_dio_wait(inode); + wait_var_event(&inode->i_dio_count, inode_dio_finished); } EXPORT_SYMBOL(inode_dio_wait); +void inode_dio_wait_interruptible(struct inode *inode) +{ + wait_var_event_interruptible(&inode->i_dio_count, inode_dio_finished); +} +EXPORT_SYMBOL(inode_dio_wait_interruptible); + /* * inode_set_flags - atomically set some inode flags * diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c index 75dc52a49b3a..c2cfdda85230 100644 --- a/fs/netfs/locking.c +++ b/fs/netfs/locking.c @@ -21,23 +21,11 @@ */ static int inode_dio_wait_interruptible(struct inode *inode) { - if (!atomic_read(&inode->i_dio_count)) + if (inode_dio_finished(inode)) return 0; - wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP); - DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); - - for (;;) { - prepare_to_wait(wq, &q.wq_entry, TASK_INTERRUPTIBLE); - if (!atomic_read(&inode->i_dio_count)) - break; - if (signal_pending(current)) - break; - schedule(); - } - finish_wait(wq, &q.wq_entry); - - return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0; + inode_dio_wait_interruptible(inode); + return !inode_dio_finished(inode) ? -ERESTARTSYS : 0; } /* Call with exclusively locked inode->i_rwsem */ diff --git a/include/linux/fs.h b/include/linux/fs.h index b6f2e2a1e513..f744cd918259 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2380,8 +2380,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * * I_REFERENCED Marks the inode as recently references on the LRU list. * - * I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit(). - * * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to * synchronize competing switching instances and to tell * wb stat updates to grab the i_pages lock. See @@ -2413,8 +2411,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define __I_SYNC 7 #define I_SYNC (1 << __I_SYNC) #define I_REFERENCED (1 << 8) -#define __I_DIO_WAKEUP 9 -#define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) #define I_LINKABLE (1 << 10) #define I_DIRTY_TIME (1 << 11) #define I_WB_SWITCH (1 << 13) @@ -3230,6 +3226,7 @@ static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, #endif void inode_dio_wait(struct inode *inode); +void inode_dio_wait_interruptible(struct inode *inode); /** * inode_dio_begin - signal start of a direct I/O requests @@ -3241,6 +3238,7 @@ void inode_dio_wait(struct inode *inode); static inline void inode_dio_begin(struct inode *inode) { atomic_inc(&inode->i_dio_count); + smp_mb__after_atomic(); } /** @@ -3252,8 +3250,9 @@ static inline void inode_dio_begin(struct inode *inode) */ static inline void inode_dio_end(struct inode *inode) { + smp_mb__before_atomic(); if (atomic_dec_and_test(&inode->i_dio_count)) - wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); + wake_up_var(&inode->i_dio_count); } extern void inode_set_flags(struct inode *inode, unsigned int flags,
Afaict, we can just rely on inode->i_dio_count for waiting instead of this awkward indirection through __I_DIO_WAKEUP. This survives LTP dio and xfstests dio tests. Signed-off-by: Christian Brauner <brauner@kernel.org> --- --- fs/inode.c | 23 +++++++++++------------ fs/netfs/locking.c | 18 +++--------------- include/linux/fs.h | 9 ++++----- 3 files changed, 18 insertions(+), 32 deletions(-) --- base-commit: 5570f04d0bb1a34ebcb27caac76c797a7c9e36c9 change-id: 20240816-vfs-misc-dio-5cb07eaae155