@@ -971,6 +971,16 @@ unfortunately any spinlock in a ``SLAB_TYPESAFE_BY_RCU`` object must be
initialized after each and every call to kmem_cache_alloc(), which renders
reference-free spinlock acquisition completely unsafe. Therefore, when
using ``SLAB_TYPESAFE_BY_RCU``, make proper use of a reference counter.
+If using refcount_t, the specialized refcount_{add|inc}_not_zero_acquire()
+and refcount_set_release() APIs should be used to ensure correct operation
+ordering when verifying object identity and when initializing newly
+allocated objects. Acquire fence in refcount_{add|inc}_not_zero_acquire()
+ensures that identity checks happen *after* reference count is taken.
+refcount_set_release() should be called after a newly allocated object is
+fully initialized and release fence ensures that new values are visible
+*before* refcount can be successfully taken by other users. Once
+refcount_set_release() is called, the object should be considered visible
+by other tasks.
(Those willing to initialize their locks in a kmem_cache constructor
may also use locking, including cache-friendly sequence locking.)
@@ -86,7 +86,19 @@ Memory ordering guarantee changes:
* none (both fully unordered)
-case 2) - increment-based ops that return no value
+case 2) - non-"Read/Modify/Write" (RMW) ops with release ordering
+-------------------------------------------
+
+Function changes:
+
+ * atomic_set_release() --> refcount_set_release()
+
+Memory ordering guarantee changes:
+
+ * none (both provide RELEASE ordering)
+
+
+case 3) - increment-based ops that return no value
--------------------------------------------------
Function changes:
@@ -98,7 +110,7 @@ Memory ordering guarantee changes:
* none (both fully unordered)
-case 3) - decrement-based RMW ops that return no value
+case 4) - decrement-based RMW ops that return no value
------------------------------------------------------
Function changes:
@@ -110,7 +122,7 @@ Memory ordering guarantee changes:
* fully unordered --> RELEASE ordering
-case 4) - increment-based RMW ops that return a value
+case 5) - increment-based RMW ops that return a value
-----------------------------------------------------
Function changes:
@@ -126,7 +138,20 @@ Memory ordering guarantees changes:
result of obtaining pointer to the object!
-case 5) - generic dec/sub decrement-based RMW ops that return a value
+case 6) - increment-based RMW ops with acquire ordering that return a value
+-----------------------------------------------------
+
+Function changes:
+
+ * atomic_inc_not_zero() --> refcount_inc_not_zero_acquire()
+ * no atomic counterpart --> refcount_add_not_zero_acquire()
+
+Memory ordering guarantees changes:
+
+ * fully ordered --> ACQUIRE ordering on success
+
+
+case 7) - generic dec/sub decrement-based RMW ops that return a value
---------------------------------------------------------------------
Function changes:
@@ -139,7 +164,7 @@ Memory ordering guarantees changes:
* fully ordered --> RELEASE ordering + ACQUIRE ordering on success
-case 6) other decrement-based RMW ops that return a value
+case 8) other decrement-based RMW ops that return a value
---------------------------------------------------------
Function changes:
@@ -154,7 +179,7 @@ Memory ordering guarantees changes:
.. note:: atomic_add_unless() only provides full order on success.
-case 7) - lock-based RMW
+case 9) - lock-based RMW
------------------------
Function changes:
@@ -87,6 +87,15 @@
* The decrements dec_and_test() and sub_and_test() also provide acquire
* ordering on success.
*
+ * refcount_{add|inc}_not_zero_acquire() and refcount_set_release() provide
+ * acquire and release ordering for cases when the memory occupied by the
+ * object might be reused to store another object. This is important for the
+ * cases where secondary validation is required to detect such reuse, e.g.
+ * SLAB_TYPESAFE_BY_RCU. The secondary validation checks have to happen after
+ * the refcount is taken, hence acquire order is necessary. Similarly, when the
+ * object is initialized, all stores to its attributes should be visible before
+ * the refcount is set, otherwise a stale attribute value might be used by
+ * another task which succeeds in taking a refcount to the new object.
*/
#ifndef _LINUX_REFCOUNT_H
@@ -125,6 +134,31 @@ static inline void refcount_set(refcount_t *r, int n)
atomic_set(&r->refs, n);
}
+/**
+ * refcount_set_release - set a refcount's value with release ordering
+ * @r: the refcount
+ * @n: value to which the refcount will be set
+ *
+ * This function should be used when memory occupied by the object might be
+ * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU.
+ *
+ * Provides release memory ordering which will order previous memory operations
+ * against this store. This ensures all updates to this object are visible
+ * once the refcount is set and stale values from the object previously
+ * occupying this memory are overwritten with new ones.
+ *
+ * This function should be called only after new object is fully initialized.
+ * After this call the object should be considered visible to other tasks even
+ * if it was not yet added into an object collection normally used to discover
+ * it. This is because other tasks might have discovered the object previously
+ * occupying the same memory and after memory reuse they can succeed in taking
+ * refcount to the new object and start using it.
+ */
+static inline void refcount_set_release(refcount_t *r, int n)
+{
+ atomic_set_release(&r->refs, n);
+}
+
/**
* refcount_read - get a refcount's value
* @r: the refcount
@@ -178,6 +212,52 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
return __refcount_add_not_zero(i, r, NULL);
}
+static inline __must_check __signed_wrap
+bool __refcount_add_not_zero_acquire(int i, refcount_t *r, int *oldp)
+{
+ int old = refcount_read(r);
+
+ do {
+ if (!old)
+ break;
+ } while (!atomic_try_cmpxchg_acquire(&r->refs, &old, old + i));
+
+ if (oldp)
+ *oldp = old;
+
+ if (unlikely(old < 0 || old + i < 0))
+ refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF);
+
+ return old;
+}
+
+/**
+ * refcount_add_not_zero_acquire - add a value to a refcount with acquire ordering unless it is 0
+ *
+ * @i: the value to add to the refcount
+ * @r: the refcount
+ *
+ * Will saturate at REFCOUNT_SATURATED and WARN.
+ *
+ * This function should be used when memory occupied by the object might be
+ * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU.
+ *
+ * Provides acquire memory ordering on success, it is assumed the caller has
+ * guaranteed the object memory to be stable (RCU, etc.). It does provide a
+ * control dependency and thereby orders future stores. See the comment on top.
+ *
+ * Use of this function is not recommended for the normal reference counting
+ * use case in which references are taken and released one at a time. In these
+ * cases, refcount_inc_not_zero_acquire() should instead be used to increment a
+ * reference count.
+ *
+ * Return: false if the passed refcount is 0, true otherwise
+ */
+static inline __must_check bool refcount_add_not_zero_acquire(int i, refcount_t *r)
+{
+ return __refcount_add_not_zero_acquire(i, r, NULL);
+}
+
static inline __signed_wrap
void __refcount_add(int i, refcount_t *r, int *oldp)
{
@@ -236,6 +316,32 @@ static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
return __refcount_inc_not_zero(r, NULL);
}
+static inline __must_check bool __refcount_inc_not_zero_acquire(refcount_t *r, int *oldp)
+{
+ return __refcount_add_not_zero_acquire(1, r, oldp);
+}
+
+/**
+ * refcount_inc_not_zero_acquire - increment a refcount with acquire ordering unless it is 0
+ * @r: the refcount to increment
+ *
+ * Similar to refcount_inc_not_zero(), but provides acquire memory ordering on
+ * success.
+ *
+ * This function should be used when memory occupied by the object might be
+ * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU.
+ *
+ * Provides acquire memory ordering on success, it is assumed the caller has
+ * guaranteed the object memory to be stable (RCU, etc.). It does provide a
+ * control dependency and thereby orders future stores. See the comment on top.
+ *
+ * Return: true if the increment was successful, false otherwise
+ */
+static inline __must_check bool refcount_inc_not_zero_acquire(refcount_t *r)
+{
+ return __refcount_inc_not_zero_acquire(r, NULL);
+}
+
static inline void __refcount_inc(refcount_t *r, int *oldp)
{
__refcount_add(1, r, oldp);
@@ -136,6 +136,15 @@ enum _slab_flag_bits {
* rcu_read_lock before reading the address, then rcu_read_unlock after
* taking the spinlock within the structure expected at that address.
*
+ * Note that object identity check has to be done *after* acquiring a
+ * reference, therefore user has to ensure proper ordering for loads.
+ * Similarly, when initializing objects allocated with SLAB_TYPESAFE_BY_RCU,
+ * the newly allocated object has to be fully initialized *before* its
+ * refcount gets initialized and proper ordering for stores is required.
+ * refcount_{add|inc}_not_zero_acquire() and refcount_set_release() are
+ * designed with the proper fences required for reference counting objects
+ * allocated with SLAB_TYPESAFE_BY_RCU.
+ *
* Note that it is not possible to acquire a lock within a structure
* allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference
* as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages