diff mbox

[03/11] locking, rwsem: introduce basis for down_write_killable

Message ID 1459508695-14915-4-git-send-email-mhocko@kernel.org (mailing list archive)
State New, archived
Headers show

Commit Message

Michal Hocko April 1, 2016, 11:04 a.m. UTC
From: Michal Hocko <mhocko@suse.com>

Introduce a generic implementation necessary for down_write_killable.
This is a trivial extension of the already existing down_write call
which can be interrupted by SIGKILL.  This patch doesn't provide
down_write_killable yet because arches have to provide the necessary
pieces before.

rwsem_down_write_failed which is a generic slow path for the
write lock is extended to allow a task state and renamed to
__rwsem_down_write_failed_state. The return value is either a valid
semaphore pointer or ERR_PTR(-EINTR).

rwsem_down_write_failed_killable is exported as a new way to wait for
the lock and be killable.

For rwsem-spinlock implementation the current __down_write it updated
in a similar way as __rwsem_down_write_failed_state except it doesn't
need new exports just visible __down_write_killable.

Architectures which are not using the generic rwsem implementation are
supposed to provide their __down_write_killable implementation and
use rwsem_down_write_failed_killable for the slow path.

Signed-off-by: Michal Hocko <mhocko@suse.com>
---
 include/asm-generic/rwsem.h     | 12 ++++++++++++
 include/linux/rwsem-spinlock.h  |  1 +
 include/linux/rwsem.h           |  2 ++
 kernel/locking/rwsem-spinlock.c | 22 ++++++++++++++++++++--
 kernel/locking/rwsem-xadd.c     | 31 +++++++++++++++++++++++++------
 5 files changed, 60 insertions(+), 8 deletions(-)

Comments

Davidlohr Bueso April 2, 2016, 4:41 a.m. UTC | #1
On Fri, 01 Apr 2016, Michal Hocko wrote:

>From: Michal Hocko <mhocko@suse.com>
>
>Introduce a generic implementation necessary for down_write_killable.
>This is a trivial extension of the already existing down_write call
>which can be interrupted by SIGKILL.  This patch doesn't provide
>down_write_killable yet because arches have to provide the necessary
>pieces before.
>
>rwsem_down_write_failed which is a generic slow path for the
>write lock is extended to allow a task state and renamed to
>__rwsem_down_write_failed_state. The return value is either a valid
>semaphore pointer or ERR_PTR(-EINTR).
>
>rwsem_down_write_failed_killable is exported as a new way to wait for
>the lock and be killable.
>
>For rwsem-spinlock implementation the current __down_write it updated
>in a similar way as __rwsem_down_write_failed_state except it doesn't
>need new exports just visible __down_write_killable.
>
>Architectures which are not using the generic rwsem implementation are
>supposed to provide their __down_write_killable implementation and
>use rwsem_down_write_failed_killable for the slow path.

So in a nutshell, this is supposed to be the (writer) rwsem counterpart of
mutex_lock_killable() and down_killable(), right?

[...]

>--- a/kernel/locking/rwsem-xadd.c
>+++ b/kernel/locking/rwsem-xadd.c
>@@ -433,12 +433,13 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
> /*
>  * Wait until we successfully acquire the write lock
>  */
>-__visible
>-struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
>+static inline struct rw_semaphore *
>+__rwsem_down_write_failed_state(struct rw_semaphore *sem, int state)

fwiw I'm not a fan of the _state naming. While I understand why you chose it, I feel
it does not really describe the purpose of the call itself. The state logic alone is
really quite small and therefore should not govern the function name. Why not just apply
kiss and label things _common, ie like mutexes do? This would also standardize names a
bit.

> {
> 	long count;
> 	bool waiting = true; /* any queued threads before us */
> 	struct rwsem_waiter waiter;
>+	struct rw_semaphore *ret = sem;
>
> 	/* undo write bias from down_write operation, stop active locking */
> 	count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
>@@ -478,7 +479,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
> 		count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
>
> 	/* wait until we successfully acquire the lock */
>-	set_current_state(TASK_UNINTERRUPTIBLE);
>+	set_current_state(state);
> 	while (true) {
> 		if (rwsem_try_write_lock(count, sem))
> 			break;
>@@ -486,21 +487,39 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
>
> 		/* Block until there are no active lockers. */
> 		do {
>+			if (signal_pending_state(state, current)) {

                           ^^ unlikely()?

>+				raw_spin_lock_irq(&sem->wait_lock);

If the lock is highly contended + a bad workload for spin-on-owner, this could take a while :)
Of course, this is a side effect of the wait until no active lockers optimization which avoids
the wait_lock in the first place, so fortunately it somewhat mitigates the situation.

>+				ret = ERR_PTR(-EINTR);
>+				goto out;
>+			}
> 			schedule();
>-			set_current_state(TASK_UNINTERRUPTIBLE);
>+			set_current_state(state);
> 		} while ((count = sem->count) & RWSEM_ACTIVE_MASK);
>
> 		raw_spin_lock_irq(&sem->wait_lock);
> 	}
>+out:
> 	__set_current_state(TASK_RUNNING);
>-

You certainly don't want this iff exiting due to TASK_KILLABLE situation.

> 	list_del(&waiter.list);
> 	raw_spin_unlock_irq(&sem->wait_lock);
>
>-	return sem;
>+	return ret;
>+}

Thanks,
Davidlohr
--
To unsubscribe from this list: send the line "unsubscribe linux-sh" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michal Hocko April 4, 2016, 9:17 a.m. UTC | #2
On Fri 01-04-16 21:41:25, Davidlohr Bueso wrote:
> On Fri, 01 Apr 2016, Michal Hocko wrote:
> 
> >From: Michal Hocko <mhocko@suse.com>
> >
> >Introduce a generic implementation necessary for down_write_killable.
> >This is a trivial extension of the already existing down_write call
> >which can be interrupted by SIGKILL.  This patch doesn't provide
> >down_write_killable yet because arches have to provide the necessary
> >pieces before.
> >
> >rwsem_down_write_failed which is a generic slow path for the
> >write lock is extended to allow a task state and renamed to
> >__rwsem_down_write_failed_state. The return value is either a valid
> >semaphore pointer or ERR_PTR(-EINTR).
> >
> >rwsem_down_write_failed_killable is exported as a new way to wait for
> >the lock and be killable.
> >
> >For rwsem-spinlock implementation the current __down_write it updated
> >in a similar way as __rwsem_down_write_failed_state except it doesn't
> >need new exports just visible __down_write_killable.
> >
> >Architectures which are not using the generic rwsem implementation are
> >supposed to provide their __down_write_killable implementation and
> >use rwsem_down_write_failed_killable for the slow path.
> 
> So in a nutshell, this is supposed to be the (writer) rwsem counterpart of
> mutex_lock_killable() and down_killable(), right?

Yes.

> [...]
> 
> >--- a/kernel/locking/rwsem-xadd.c
> >+++ b/kernel/locking/rwsem-xadd.c
> >@@ -433,12 +433,13 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
> >/*
> > * Wait until we successfully acquire the write lock
> > */
> >-__visible
> >-struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
> >+static inline struct rw_semaphore *
> >+__rwsem_down_write_failed_state(struct rw_semaphore *sem, int state)
> 
> fwiw I'm not a fan of the _state naming. While I understand why you chose it, I feel
> it does not really describe the purpose of the call itself. The state logic alone is
> really quite small and therefore should not govern the function name. Why not just apply
> kiss and label things _common, ie like mutexes do? This would also standardize names a
> bit.

I really do not care much about naming. So if _common sounds better I
can certainly rename.

> 
> >{
> >	long count;
> >	bool waiting = true; /* any queued threads before us */
> >	struct rwsem_waiter waiter;
> >+	struct rw_semaphore *ret = sem;
> >
> >	/* undo write bias from down_write operation, stop active locking */
> >	count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
> >@@ -478,7 +479,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
> >		count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
> >
> >	/* wait until we successfully acquire the lock */
> >-	set_current_state(TASK_UNINTERRUPTIBLE);
> >+	set_current_state(state);
> >	while (true) {
> >		if (rwsem_try_write_lock(count, sem))
> >			break;
> >@@ -486,21 +487,39 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
> >
> >		/* Block until there are no active lockers. */
> >		do {
> >+			if (signal_pending_state(state, current)) {
> 
>                           ^^ unlikely()?

The generated code is identical after I've added unlikely. I haven't
tried more gcc versions (mine is 5.3.1) but is this worth it?

> 
> >+				raw_spin_lock_irq(&sem->wait_lock);
> 
> If the lock is highly contended + a bad workload for spin-on-owner, this could take a while :)
> Of course, this is a side effect of the wait until no active lockers optimization which avoids
> the wait_lock in the first place, so fortunately it somewhat mitigates the situation.
> 
> >+				ret = ERR_PTR(-EINTR);
> >+				goto out;
> >+			}
> >			schedule();
> >-			set_current_state(TASK_UNINTERRUPTIBLE);
> >+			set_current_state(state);
> >		} while ((count = sem->count) & RWSEM_ACTIVE_MASK);
> >
> >		raw_spin_lock_irq(&sem->wait_lock);
> >	}
> >+out:
> >	__set_current_state(TASK_RUNNING);
> >-
> 
> You certainly don't want this iff exiting due to TASK_KILLABLE situation.

Not sure I got your point here.
Peter Zijlstra April 4, 2016, 9:21 a.m. UTC | #3
On Mon, Apr 04, 2016 at 11:17:00AM +0200, Michal Hocko wrote:
> > >@@ -486,21 +487,39 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
> > >
> > >		/* Block until there are no active lockers. */
> > >		do {
> > >+			if (signal_pending_state(state, current)) {
> > 
> >                           ^^ unlikely()?
> 
> The generated code is identical after I've added unlikely. I haven't
> tried more gcc versions (mine is 5.3.1) but is this worth it?

Both signal_pending() and __fatal_signal_pending() already have an
unlikely(), which is why adding it here is superfluous.
--
To unsubscribe from this list: send the line "unsubscribe linux-sh" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Davidlohr Bueso April 7, 2016, 6:58 a.m. UTC | #4
On Mon, 04 Apr 2016, Michal Hocko wrote:

>Not sure I got your point here.

You set current to TASK_KILLABLE in the sleep loop, why do you want to change
it here to TASK_RUNNING if its about to be killed? At least in the case of
UNINTERRUPTABLE we do it merely as a redundancy after the breaking out of the
loop. Of course we also acquired the lock in the first place by that time and
we _better_ be running.

Thanks,
Davidlohr
--
To unsubscribe from this list: send the line "unsubscribe linux-sh" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michal Hocko April 7, 2016, 7:38 a.m. UTC | #5
On Wed 06-04-16 23:58:07, Davidlohr Bueso wrote:
> On Mon, 04 Apr 2016, Michal Hocko wrote:
> 
> >Not sure I got your point here.
> 
> You set current to TASK_KILLABLE in the sleep loop, why do you want to change
> it here to TASK_RUNNING if its about to be killed?

Wouldn't it be unexpected to return from a lock with something else than
TASK_RUNNING?

> At least in the case of
> UNINTERRUPTABLE we do it merely as a redundancy after the breaking out of the
> loop. Of course we also acquired the lock in the first place by that time and
> we _better_ be running.

I guess the reason was that rwsem_try_write_lock might suceed and we do
not want to return with TASK_UNINTERRUPTIBLE in that case.
diff mbox

Patch

diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
index b8d8a6cf4ca8..3fc94a046bf5 100644
--- a/include/asm-generic/rwsem.h
+++ b/include/asm-generic/rwsem.h
@@ -63,6 +63,18 @@  static inline void __down_write(struct rw_semaphore *sem)
 		rwsem_down_write_failed(sem);
 }
 
+static inline int __down_write_killable(struct rw_semaphore *sem)
+{
+	long tmp;
+
+	tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
+				     (atomic_long_t *)&sem->count);
+	if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
+		if (IS_ERR(rwsem_down_write_failed_killable(sem)))
+			return -EINTR;
+	return 0;
+}
+
 static inline int __down_write_trylock(struct rw_semaphore *sem)
 {
 	long tmp;
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index a733a5467e6c..ae0528b834cd 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -34,6 +34,7 @@  struct rw_semaphore {
 extern void __down_read(struct rw_semaphore *sem);
 extern int __down_read_trylock(struct rw_semaphore *sem);
 extern void __down_write(struct rw_semaphore *sem);
+extern int __must_check __down_write_killable(struct rw_semaphore *sem);
 extern int __down_write_trylock(struct rw_semaphore *sem);
 extern void __up_read(struct rw_semaphore *sem);
 extern void __up_write(struct rw_semaphore *sem);
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 8f498cdde280..7d7ae029dac5 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -14,6 +14,7 @@ 
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
+#include <linux/err.h>
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 #include <linux/osq_lock.h>
 #endif
@@ -43,6 +44,7 @@  struct rw_semaphore {
 
 extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
 extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
 extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
 extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
 
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index bab26104a5d0..fb2db7b408f0 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -191,11 +191,12 @@  int __down_read_trylock(struct rw_semaphore *sem)
 /*
  * get a write lock on the semaphore
  */
-void __sched __down_write(struct rw_semaphore *sem)
+int __sched __down_write_state(struct rw_semaphore *sem, int state)
 {
 	struct rwsem_waiter waiter;
 	struct task_struct *tsk;
 	unsigned long flags;
+	int ret = 0;
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
@@ -215,16 +216,33 @@  void __sched __down_write(struct rw_semaphore *sem)
 		 */
 		if (sem->count == 0)
 			break;
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+		if (signal_pending_state(state, current)) {
+			ret = -EINTR;
+			goto out;
+		}
+		set_task_state(tsk, state);
 		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 		schedule();
 		raw_spin_lock_irqsave(&sem->wait_lock, flags);
 	}
 	/* got the lock */
 	sem->count = -1;
+out:
 	list_del(&waiter.list);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+	return ret;
+}
+
+void __sched __down_write(struct rw_semaphore *sem)
+{
+	__down_write_state(sem, TASK_UNINTERRUPTIBLE);
+}
+
+int __sched __down_write_killable(struct rw_semaphore *sem)
+{
+	return __down_write_state(sem, TASK_KILLABLE);
 }
 
 /*
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index a4d4de05b2d1..781b2628e41b 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -433,12 +433,13 @@  static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
 /*
  * Wait until we successfully acquire the write lock
  */
-__visible
-struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
+static inline struct rw_semaphore *
+__rwsem_down_write_failed_state(struct rw_semaphore *sem, int state)
 {
 	long count;
 	bool waiting = true; /* any queued threads before us */
 	struct rwsem_waiter waiter;
+	struct rw_semaphore *ret = sem;
 
 	/* undo write bias from down_write operation, stop active locking */
 	count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
@@ -478,7 +479,7 @@  struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
 		count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
 
 	/* wait until we successfully acquire the lock */
-	set_current_state(TASK_UNINTERRUPTIBLE);
+	set_current_state(state);
 	while (true) {
 		if (rwsem_try_write_lock(count, sem))
 			break;
@@ -486,21 +487,39 @@  struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
 
 		/* Block until there are no active lockers. */
 		do {
+			if (signal_pending_state(state, current)) {
+				raw_spin_lock_irq(&sem->wait_lock);
+				ret = ERR_PTR(-EINTR);
+				goto out;
+			}
 			schedule();
-			set_current_state(TASK_UNINTERRUPTIBLE);
+			set_current_state(state);
 		} while ((count = sem->count) & RWSEM_ACTIVE_MASK);
 
 		raw_spin_lock_irq(&sem->wait_lock);
 	}
+out:
 	__set_current_state(TASK_RUNNING);
-
 	list_del(&waiter.list);
 	raw_spin_unlock_irq(&sem->wait_lock);
 
-	return sem;
+	return ret;
+}
+
+__visible struct rw_semaphore * __sched
+rwsem_down_write_failed(struct rw_semaphore *sem)
+{
+	return __rwsem_down_write_failed_state(sem, TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(rwsem_down_write_failed);
 
+__visible struct rw_semaphore * __sched
+rwsem_down_write_failed_killable(struct rw_semaphore *sem)
+{
+	return __rwsem_down_write_failed_state(sem, TASK_KILLABLE);
+}
+EXPORT_SYMBOL(rwsem_down_write_failed_killable);
+
 /*
  * handle waking up a waiter on the semaphore
  * - up_read/up_write has decremented the active part of count if we come here