Message ID | 20240821-work-i_state-v2-5-67244769f102@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | inode: turn i_state into u32 | expand |
On Wed, 2024-08-21 at 17:47 +0200, Christian Brauner wrote: > Port the __I_LRU_ISOLATING mechanism to use the new var event mechanism. > > Signed-off-by: Christian Brauner <brauner@kernel.org> > --- > fs/inode.c | 26 +++++++++++++++++--------- > 1 file changed, 17 insertions(+), 9 deletions(-) > > diff --git a/fs/inode.c b/fs/inode.c > index d18e1567c487..c8a5c63dc980 100644 > --- a/fs/inode.c > +++ b/fs/inode.c > @@ -510,8 +510,7 @@ static void inode_unpin_lru_isolating(struct inode *inode) > spin_lock(&inode->i_lock); > WARN_ON(!(inode->i_state & I_LRU_ISOLATING)); > inode->i_state &= ~I_LRU_ISOLATING; > - smp_mb(); > - wake_up_bit(&inode->i_state, __I_LRU_ISOLATING); > + inode_wake_up_bit(inode, __I_LRU_ISOLATING); > spin_unlock(&inode->i_lock); > } > > @@ -519,13 +518,22 @@ static void inode_wait_for_lru_isolating(struct inode *inode) > { > lockdep_assert_held(&inode->i_lock); > if (inode->i_state & I_LRU_ISOLATING) { > - DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING); > - wait_queue_head_t *wqh; > - > - wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING); > - spin_unlock(&inode->i_lock); > - __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE); > - spin_lock(&inode->i_lock); > + struct wait_bit_queue_entry wqe; > + struct wait_queue_head *wq_head; > + > + wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING); > + for (;;) { > + prepare_to_wait_event(wq_head, &wqe.wq_entry, > + TASK_UNINTERRUPTIBLE); > + if (inode->i_state & I_LRU_ISOLATING) { > + spin_unlock(&inode->i_lock); > + schedule(); > + spin_lock(&inode->i_lock); > + continue; > + } > + break; > + } nit: personally, I'd prefer this, since you wouldn't need the brackets or the continue: if (!(inode->i_state & LRU_ISOLATING)) break; spin_unlock(); schedule(); ... > + finish_wait(wq_head, &wqe.wq_entry); > WARN_ON(inode->i_state & I_LRU_ISOLATING); > } > } >
On Wed, Aug 21, 2024 at 03:41:45PM GMT, Jeff Layton wrote: > On Wed, 2024-08-21 at 17:47 +0200, Christian Brauner wrote: > > Port the __I_LRU_ISOLATING mechanism to use the new var event mechanism. > > > > Signed-off-by: Christian Brauner <brauner@kernel.org> > > --- > > fs/inode.c | 26 +++++++++++++++++--------- > > 1 file changed, 17 insertions(+), 9 deletions(-) > > > > diff --git a/fs/inode.c b/fs/inode.c > > index d18e1567c487..c8a5c63dc980 100644 > > --- a/fs/inode.c > > +++ b/fs/inode.c > > @@ -510,8 +510,7 @@ static void inode_unpin_lru_isolating(struct inode *inode) > > spin_lock(&inode->i_lock); > > WARN_ON(!(inode->i_state & I_LRU_ISOLATING)); > > inode->i_state &= ~I_LRU_ISOLATING; > > - smp_mb(); > > - wake_up_bit(&inode->i_state, __I_LRU_ISOLATING); > > + inode_wake_up_bit(inode, __I_LRU_ISOLATING); > > spin_unlock(&inode->i_lock); > > } > > > > @@ -519,13 +518,22 @@ static void inode_wait_for_lru_isolating(struct inode *inode) > > { > > lockdep_assert_held(&inode->i_lock); > > if (inode->i_state & I_LRU_ISOLATING) { > > - DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING); > > - wait_queue_head_t *wqh; > > - > > - wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING); > > - spin_unlock(&inode->i_lock); > > - __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE); > > - spin_lock(&inode->i_lock); > > + struct wait_bit_queue_entry wqe; > > + struct wait_queue_head *wq_head; > > + > > + wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING); > > + for (;;) { > > + prepare_to_wait_event(wq_head, &wqe.wq_entry, > > + TASK_UNINTERRUPTIBLE); > > + if (inode->i_state & I_LRU_ISOLATING) { > > + spin_unlock(&inode->i_lock); > > + schedule(); > > + spin_lock(&inode->i_lock); > > + continue; > > + } > > + break; > > + } > > nit: personally, I'd prefer this, since you wouldn't need the brackets > or the continue: > > if (!(inode->i_state & LRU_ISOLATING)) > break; > spin_unlock(); > schedule(); Yeah, that's nicer. I'll use that.
On Thu, Aug 22, 2024 at 10:53:50AM +0200, Christian Brauner wrote: > On Wed, Aug 21, 2024 at 03:41:45PM GMT, Jeff Layton wrote: > > > if (inode->i_state & I_LRU_ISOLATING) { > > > - DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING); > > > - wait_queue_head_t *wqh; > > > - > > > - wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING); > > > - spin_unlock(&inode->i_lock); > > > - __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE); > > > - spin_lock(&inode->i_lock); > > > + struct wait_bit_queue_entry wqe; > > > + struct wait_queue_head *wq_head; > > > + > > > + wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING); > > > + for (;;) { > > > + prepare_to_wait_event(wq_head, &wqe.wq_entry, > > > + TASK_UNINTERRUPTIBLE); > > > + if (inode->i_state & I_LRU_ISOLATING) { > > > + spin_unlock(&inode->i_lock); > > > + schedule(); > > > + spin_lock(&inode->i_lock); > > > + continue; > > > + } > > > + break; > > > + } > > > > nit: personally, I'd prefer this, since you wouldn't need the brackets > > or the continue: > > > > if (!(inode->i_state & LRU_ISOLATING)) > > break; > > spin_unlock(); > > schedule(); > > Yeah, that's nicer. I'll use that. In that case may I suggest also short-circuiting the entire func? if (!(inode->i_state & I_LRU_ISOLATING)) return; then the entire thing loses one indentation level Same thing is applicable to the I_SYNC flag. A non-cosmetic is as follows: is it legal for a wake up to happen spuriously? For legal waiters on the flag I_FREEING is set which prevents I_LRU_ISOLATING from showing up afterwards. Thus it should be sufficient to wait for the flag to clear once. This is moot if spurious wakeups do happen. If looping is needed, then this warn: WARN_ON(inode->i_state & I_LRU_ISOLATING); fails to test for anything since the loop is on that very condition (iow it needs to be whacked) The writeback code needs to loop because there are callers outside of evict().
On Thu, Aug 22, 2024 at 11:48:45AM GMT, Mateusz Guzik wrote: > On Thu, Aug 22, 2024 at 10:53:50AM +0200, Christian Brauner wrote: > > On Wed, Aug 21, 2024 at 03:41:45PM GMT, Jeff Layton wrote: > > > > if (inode->i_state & I_LRU_ISOLATING) { > > > > - DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING); > > > > - wait_queue_head_t *wqh; > > > > - > > > > - wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING); > > > > - spin_unlock(&inode->i_lock); > > > > - __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE); > > > > - spin_lock(&inode->i_lock); > > > > + struct wait_bit_queue_entry wqe; > > > > + struct wait_queue_head *wq_head; > > > > + > > > > + wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING); > > > > + for (;;) { > > > > + prepare_to_wait_event(wq_head, &wqe.wq_entry, > > > > + TASK_UNINTERRUPTIBLE); > > > > + if (inode->i_state & I_LRU_ISOLATING) { > > > > + spin_unlock(&inode->i_lock); > > > > + schedule(); > > > > + spin_lock(&inode->i_lock); > > > > + continue; > > > > + } > > > > + break; > > > > + } > > > > > > nit: personally, I'd prefer this, since you wouldn't need the brackets > > > or the continue: > > > > > > if (!(inode->i_state & LRU_ISOLATING)) > > > break; > > > spin_unlock(); > > > schedule(); > > > > Yeah, that's nicer. I'll use that. > > In that case may I suggest also short-circuiting the entire func? > > if (!(inode->i_state & I_LRU_ISOLATING)) > return; Afaict, if prepare_to_wait_event() has been called finish_wait() must be called. > > then the entire thing loses one indentation level > > Same thing is applicable to the I_SYNC flag. > > A non-cosmetic is as follows: is it legal for a wake up to happen > spuriously? So, I simply mimicked ___wait_var_event() and __wait_on_bit() - both loop. I reasoned that ___wait_var_event() via @state and __wait_on_bit() via @mode take the task state into account to wait in. And my conclusion was that they don't loop on TASK_UNINTERRUPTIBLE but would loop on e.g., TASK_INTERRUPTIBLE. Iow, I assume that spurious wakeups shouldn't happen with TASK_UNINTERRUPTIBLE. But it's not something I'm able to assert with absolute confidence so I erred on the side of caution.
On Thu, Aug 22, 2024 at 01:10:43PM +0200, Christian Brauner wrote: > On Thu, Aug 22, 2024 at 11:48:45AM GMT, Mateusz Guzik wrote: > > On Thu, Aug 22, 2024 at 10:53:50AM +0200, Christian Brauner wrote: > > > On Wed, Aug 21, 2024 at 03:41:45PM GMT, Jeff Layton wrote: > > > > > if (inode->i_state & I_LRU_ISOLATING) { > > > > > - DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING); > > > > > - wait_queue_head_t *wqh; > > > > > - > > > > > - wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING); > > > > > - spin_unlock(&inode->i_lock); > > > > > - __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE); > > > > > - spin_lock(&inode->i_lock); > > > > > + struct wait_bit_queue_entry wqe; > > > > > + struct wait_queue_head *wq_head; > > > > > + > > > > > + wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING); > > > > > + for (;;) { > > > > > + prepare_to_wait_event(wq_head, &wqe.wq_entry, > > > > > + TASK_UNINTERRUPTIBLE); > > > > > + if (inode->i_state & I_LRU_ISOLATING) { > > > > > + spin_unlock(&inode->i_lock); > > > > > + schedule(); > > > > > + spin_lock(&inode->i_lock); > > > > > + continue; > > > > > + } > > > > > + break; > > > > > + } > > > > > > > > nit: personally, I'd prefer this, since you wouldn't need the brackets > > > > or the continue: > > > > > > > > if (!(inode->i_state & LRU_ISOLATING)) > > > > break; > > > > spin_unlock(); > > > > schedule(); > > > > > > Yeah, that's nicer. I'll use that. > > > > In that case may I suggest also short-circuiting the entire func? > > > > if (!(inode->i_state & I_LRU_ISOLATING)) > > return; > > Afaict, if prepare_to_wait_event() has been called finish_wait() must be > called. > I mean the upfront check, before any other work: static void inode_wait_for_lru_isolating(struct inode *inode) { lockdep_assert_held(&inode->i_lock); if (!(inode->i_state & I_LRU_ISOLATING)) return; /* for loop goes here, losing an indentation level but otherwise * identical. same remark for the writeback thing */ } > > > > then the entire thing loses one indentation level > > > > Same thing is applicable to the I_SYNC flag. > > > > A non-cosmetic is as follows: is it legal for a wake up to happen > > spuriously? > > So, I simply mimicked ___wait_var_event() and __wait_on_bit() - both > loop. I reasoned that ___wait_var_event() via @state and __wait_on_bit() > via @mode take the task state into account to wait in. And my conclusion > was that they don't loop on TASK_UNINTERRUPTIBLE but would loop on e.g., > TASK_INTERRUPTIBLE. Iow, I assume that spurious wakeups shouldn't happen > with TASK_UNINTERRUPTIBLE. > > But it's not something I'm able to assert with absolute confidence so I > erred on the side of caution. This definitely should be sorted out that callers don't need to loop if the condition is a one off in the worst case, but I concede getting there is not *needed* at the moment, just a fixme.
On Thu, 22 Aug 2024, Christian Brauner wrote: > Port the __I_LRU_ISOLATING mechanism to use the new var event mechanism. > > Signed-off-by: Christian Brauner <brauner@kernel.org> > --- > fs/inode.c | 26 +++++++++++++++++--------- > 1 file changed, 17 insertions(+), 9 deletions(-) > > diff --git a/fs/inode.c b/fs/inode.c > index d18e1567c487..c8a5c63dc980 100644 > --- a/fs/inode.c > +++ b/fs/inode.c > @@ -510,8 +510,7 @@ static void inode_unpin_lru_isolating(struct inode *inode) > spin_lock(&inode->i_lock); > WARN_ON(!(inode->i_state & I_LRU_ISOLATING)); > inode->i_state &= ~I_LRU_ISOLATING; > - smp_mb(); > - wake_up_bit(&inode->i_state, __I_LRU_ISOLATING); > + inode_wake_up_bit(inode, __I_LRU_ISOLATING); > spin_unlock(&inode->i_lock); > } > > @@ -519,13 +518,22 @@ static void inode_wait_for_lru_isolating(struct inode *inode) > { > lockdep_assert_held(&inode->i_lock); > if (inode->i_state & I_LRU_ISOLATING) { > - DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING); > - wait_queue_head_t *wqh; > - > - wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING); > - spin_unlock(&inode->i_lock); > - __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE); > - spin_lock(&inode->i_lock); > + struct wait_bit_queue_entry wqe; > + struct wait_queue_head *wq_head; > + > + wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING); > + for (;;) { > + prepare_to_wait_event(wq_head, &wqe.wq_entry, > + TASK_UNINTERRUPTIBLE); > + if (inode->i_state & I_LRU_ISOLATING) { > + spin_unlock(&inode->i_lock); > + schedule(); > + spin_lock(&inode->i_lock); > + continue; > + } > + break; > + } > + finish_wait(wq_head, &wqe.wq_entry); I would really like to add wait_var_event_locked(variable, conditon, spinlock) so that above would be one or two lines. #define wait_var_event_locked(var, condition, lock) \ do { \ might_sleep(); \ if (condition) \ break; \ ___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, \ 0, 0, \ spin_unlock(lock);schedule();spin_lock(lock)); \ } while(0) That can happen after you series lands though. The wake_up here don't need a memory barrier either. NeilBrown > WARN_ON(inode->i_state & I_LRU_ISOLATING); > } > } > > -- > 2.43.0 > >
On Fri, 23 Aug 2024 at 08:37, NeilBrown <neilb@suse.de> wrote: > > I would really like to add > > wait_var_event_locked(variable, condition, spinlock) > > so that above would be one or two lines. We don't even have that for the regular wait_event, but we do have wait_event_interruptible_locked wait_event_interruptible_locked_irq but they actually only work on the wait-queue lock, not on some generic "use this lock". Honestly, I like your version much better, but having two *very* different models for what "locked" means looks wrong. The wait-queue lock (our current "locked" event version) is a rather important special case, though, and takes advantage of holding the waitqueue lock by then using __add_wait_queue_entry_tail() without locking. "You are in a maze of twisty little functions, all alike". Linus
On Fri, Aug 23, 2024 at 10:24:57AM GMT, Linus Torvalds wrote: > On Fri, 23 Aug 2024 at 08:37, NeilBrown <neilb@suse.de> wrote: > > > > I would really like to add > > > > wait_var_event_locked(variable, condition, spinlock) > > > > so that above would be one or two lines. > > We don't even have that for the regular wait_event, but we do have > > wait_event_interruptible_locked > wait_event_interruptible_locked_irq > > but they actually only work on the wait-queue lock, not on some > generic "use this lock". > > Honestly, I like your version much better, but having two *very* > different models for what "locked" means looks wrong. > > The wait-queue lock (our current "locked" event version) is a rather > important special case, though, and takes advantage of holding the > waitqueue lock by then using __add_wait_queue_entry_tail() without > locking. > > "You are in a maze of twisty little functions, all alike". "You could've used a little more cowbell^wunderscores." __fput() ____fput() __wait_var_event() ___wait_var_event() https://www.youtube.com/watch?v=cVsQLlk-T0s
diff --git a/fs/inode.c b/fs/inode.c index d18e1567c487..c8a5c63dc980 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -510,8 +510,7 @@ static void inode_unpin_lru_isolating(struct inode *inode) spin_lock(&inode->i_lock); WARN_ON(!(inode->i_state & I_LRU_ISOLATING)); inode->i_state &= ~I_LRU_ISOLATING; - smp_mb(); - wake_up_bit(&inode->i_state, __I_LRU_ISOLATING); + inode_wake_up_bit(inode, __I_LRU_ISOLATING); spin_unlock(&inode->i_lock); } @@ -519,13 +518,22 @@ static void inode_wait_for_lru_isolating(struct inode *inode) { lockdep_assert_held(&inode->i_lock); if (inode->i_state & I_LRU_ISOLATING) { - DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING); - wait_queue_head_t *wqh; - - wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING); - spin_unlock(&inode->i_lock); - __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE); - spin_lock(&inode->i_lock); + struct wait_bit_queue_entry wqe; + struct wait_queue_head *wq_head; + + wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING); + for (;;) { + prepare_to_wait_event(wq_head, &wqe.wq_entry, + TASK_UNINTERRUPTIBLE); + if (inode->i_state & I_LRU_ISOLATING) { + spin_unlock(&inode->i_lock); + schedule(); + spin_lock(&inode->i_lock); + continue; + } + break; + } + finish_wait(wq_head, &wqe.wq_entry); WARN_ON(inode->i_state & I_LRU_ISOLATING); } }
Port the __I_LRU_ISOLATING mechanism to use the new var event mechanism. Signed-off-by: Christian Brauner <brauner@kernel.org> --- fs/inode.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-)