diff mbox series

[4/4] fs/dcache: Move wakeup out of i_seq_dir write held region

Message ID 20220613140712.77932-5-bigeasy@linutronix.de (mailing list archive)
State New, archived
Headers show
Series [1/4] fs/dcache: Disable preemption on i_dir_seq write side on PREEMPT_RT | expand

Commit Message

Sebastian Sewior June 13, 2022, 2:07 p.m. UTC
__d_add() and __d_move() wake up waiters on dentry::d_wait from within the
i_seq_dir write held region.  This violates the PREEMPT_RT constraints as
the wake up acquires wait_queue_head::lock which is a "sleeping" spinlock
on RT.

There is no requirement to do so. __d_lookup_unhash() has cleared
DCACHE_PAR_LOOKUP and dentry::d_wait and returned the now unreachable wait
queue head pointer to the caller, so the actual wake up can be postponed
until the i_dir_seq write side critical section is left. The only
requirement is that dentry::lock is held across the whole sequence
including the wake up.

This is safe because:

  1) The whole sequence including the wake up is protected by dentry::lock.

  2) The waitqueue head is allocated by the caller on stack and can't go
     away until the whole callchain completes.

  3) If a queued waiter is woken by a spurious wake up, then it is blocked
     on dentry:lock before it can observe DCACHE_PAR_LOOKUP cleared and
     return from d_wait_lookup().

     As the wake up is inside the dentry:lock held region it's guaranteed
     that the waiters waitq is dequeued from the waitqueue head before the
     waiter returns.

     Moving the wake up past the unlock of dentry::lock would allow the
     waiter to return with the on stack waitq still enqueued due to a
     spurious wake up.

  4) New waiters have to acquire dentry::lock before checking whether the
     DCACHE_PAR_LOOKUP flag is set.

Move the wake up past end_dir_add() which leaves the i_dir_seq write side
critical section and enables preemption.

For non RT kernels there is no difference because preemption is still
disabled due to dentry::lock being held, but it shortens the time between
wake up and unlocking dentry::lock, which reduces the contention for the
woken up waiter.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 fs/dcache.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/fs/dcache.c b/fs/dcache.c
index 6ef1f5c32bc0f..0b5fd3a17ff7c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2747,13 +2747,15 @@  EXPORT_SYMBOL(__d_lookup_done);
 
 static inline void __d_add(struct dentry *dentry, struct inode *inode)
 {
+	wait_queue_head_t *d_wait;
 	struct inode *dir = NULL;
 	unsigned n;
+
 	spin_lock(&dentry->d_lock);
 	if (unlikely(d_in_lookup(dentry))) {
 		dir = dentry->d_parent->d_inode;
 		n = start_dir_add(dir);
-		wake_up_all(__d_lookup_unhash(dentry));
+		d_wait = __d_lookup_unhash(dentry);
 	}
 	if (inode) {
 		unsigned add_flags = d_flags_for_inode(inode);
@@ -2764,8 +2766,10 @@  static inline void __d_add(struct dentry *dentry, struct inode *inode)
 		fsnotify_update_flags(dentry);
 	}
 	__d_rehash(dentry);
-	if (dir)
+	if (dir) {
 		end_dir_add(dir, n);
+		wake_up_all(d_wait);
+	}
 	spin_unlock(&dentry->d_lock);
 	if (inode)
 		spin_unlock(&inode->i_lock);
@@ -2912,6 +2916,7 @@  static void __d_move(struct dentry *dentry, struct dentry *target,
 		     bool exchange)
 {
 	struct dentry *old_parent, *p;
+	wait_queue_head_t *d_wait;
 	struct inode *dir = NULL;
 	unsigned n;
 
@@ -2942,7 +2947,7 @@  static void __d_move(struct dentry *dentry, struct dentry *target,
 	if (unlikely(d_in_lookup(target))) {
 		dir = target->d_parent->d_inode;
 		n = start_dir_add(dir);
-		wake_up_all(__d_lookup_unhash(target));
+		d_wait = __d_lookup_unhash(target);
 	}
 
 	write_seqcount_begin(&dentry->d_seq);
@@ -2977,8 +2982,10 @@  static void __d_move(struct dentry *dentry, struct dentry *target,
 	write_seqcount_end(&target->d_seq);
 	write_seqcount_end(&dentry->d_seq);
 
-	if (dir)
+	if (dir) {
 		end_dir_add(dir, n);
+		wake_up_all(d_wait);
+	}
 
 	if (dentry->d_parent != old_parent)
 		spin_unlock(&dentry->d_parent->d_lock);