@@ -105,24 +105,33 @@ static inline struct mcs_spinlock *decod
* By using the whole 2nd least significant byte for the pending bit, we
* can allow better optimization of the lock acquisition for the pending
* bit holder.
+ *
+ * This internal structure is also used by the set_locked function which
+ * is not restricted to _Q_PENDING_BITS == 8.
*/
-#if _Q_PENDING_BITS == 8
-
struct __qspinlock {
union {
atomic_t val;
- struct {
#ifdef __LITTLE_ENDIAN
+ u8 locked;
+ struct {
u16 locked_pending;
u16 tail;
+ };
#else
+ struct {
u16 tail;
u16 locked_pending;
-#endif
};
+ struct {
+ u8 reserved[3];
+ u8 locked;
+ };
+#endif
};
};
+#if _Q_PENDING_BITS == 8
/**
* clear_pending_set_locked - take ownership and clear the pending bit.
* @lock: Pointer to queue spinlock structure
@@ -209,6 +218,19 @@ static __always_inline u32 xchg_tail(str
#endif /* _Q_PENDING_BITS == 8 */
/**
+ * set_locked - Set the lock bit and own the lock
+ * @lock: Pointer to queue spinlock structure
+ *
+ * *,*,0 -> *,0,1
+ */
+static __always_inline void set_locked(struct qspinlock *lock)
+{
+ struct __qspinlock *l = (void *)lock;
+
+ WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
+}
+
+/**
* queue_spin_lock_slowpath - acquire the queue spinlock
* @lock: Pointer to queue spinlock structure
* @val: Current value of the queue spinlock 32-bit word
@@ -343,8 +365,13 @@ void queue_spin_lock_slowpath(struct qsp
* go away.
*
* *,x,y -> *,0,0
+ *
+ * this wait loop must use a load-acquire such that we match the
+ * store-release that clears the locked bit and create lock
+ * sequentiality; this is because the set_locked() function below
+ * does not imply a full barrier.
*/
- while ((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK)
+ while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK)
cpu_relax();
/*
@@ -352,15 +379,19 @@ void queue_spin_lock_slowpath(struct qsp
*
* n,0,0 -> 0,0,1 : lock, uncontended
* *,0,0 -> *,0,1 : lock, contended
+ *
+ * If the queue head is the only one in the queue (lock value == tail),
+ * clear the tail code and grab the lock. Otherwise, we only need
+ * to grab the lock.
*/
for (;;) {
- new = _Q_LOCKED_VAL;
- if (val != tail)
- new |= val;
-
- old = atomic_cmpxchg(&lock->val, val, new);
- if (old == val)
+ if (val != tail) {
+ set_locked(lock);
break;
+ }
+ old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL);
+ if (old == val)
+ goto release; /* No contention */
val = old;
}
@@ -368,12 +399,10 @@ void queue_spin_lock_slowpath(struct qsp
/*
* contended path; wait for next, release.
*/
- if (new != _Q_LOCKED_VAL) {
- while (!(next = READ_ONCE(node->next)))
- cpu_relax();
+ while (!(next = READ_ONCE(node->next)))
+ cpu_relax();
- arch_mcs_spin_unlock_contended(&next->locked);
- }
+ arch_mcs_spin_unlock_contended(&next->locked);
release:
/*