@@ -20,10 +20,116 @@
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
#include <linux/close_range.h>
+#include <linux/file_ref.h>
#include <net/sock.h>
#include "internal.h"
+/**
+ * __file_ref_get - Slowpath of file_ref_get()
+ * @ref: Pointer to the reference count
+ *
+ * Invoked when the reference count is outside of the valid zone.
+ *
+ * Return:
+ * False if the reference count was already marked dead
+ *
+ * True if the reference count is saturated, which prevents the
+ * object from being deconstructed ever.
+ */
+bool __file_ref_get(file_ref_t *ref)
+{
+ unsigned long cnt;
+
+ /*
+ * If the reference count was already marked dead, undo the
+ * increment so it stays in the middle of the dead zone and return
+ * fail.
+ */
+ cnt = atomic_long_read(&ref->refcnt);
+ if (cnt >= FILE_REF_RELEASED) {
+ atomic_long_set(&ref->refcnt, FILE_REF_DEAD);
+ return false;
+ }
+
+ /*
+ * If it was saturated, warn and mark it so. In case the increment
+ * was already on a saturated value restore the saturation
+ * marker. This keeps it in the middle of the saturation zone and
+ * prevents the reference count from overflowing. This leaks the
+ * object memory, but prevents the obvious reference count overflow
+ * damage.
+ */
+ if (WARN_ONCE(cnt > FILE_REF_MAXREF, "leaking memory because file reference count is saturated"))
+ atomic_long_set(&ref->refcnt, FILE_REF_SATURATED);
+ return true;
+}
+EXPORT_SYMBOL_GPL(__file_ref_get);
+
+/**
+ * __file_ref_put - Slowpath of file_ref_put()
+ * @ref: Pointer to the reference count
+ *
+ * Invoked when the reference count is outside of the valid zone.
+ *
+ * Return:
+ * True if this was the last reference with no future references
+ * possible. This signals the caller that it can safely schedule the
+ * object, which is protected by the reference counter, for
+ * deconstruction.
+ *
+ * False if there are still active references or the put() raced
+ * with a concurrent get()/put() pair. Caller is not allowed to
+ * deconstruct the protected object.
+ */
+bool __file_ref_put(file_ref_t *ref)
+{
+ unsigned long cnt;
+
+ /* Did this drop the last reference? */
+ cnt = atomic_long_read(&ref->refcnt);
+ if (likely(cnt == FILE_REF_NOREF)) {
+ /*
+ * Carefully try to set the reference count to FILE_REF_DEAD.
+ *
+ * This can fail if a concurrent get() operation has
+ * elevated it again or the corresponding put() even marked
+ * it dead already. Both are valid situations and do not
+ * require a retry. If this fails the caller is not
+ * allowed to deconstruct the object.
+ */
+ if (!atomic_long_try_cmpxchg_release(&ref->refcnt, &cnt, FILE_REF_DEAD))
+ return false;
+
+ /*
+ * The caller can safely schedule the object for
+ * deconstruction. Provide acquire ordering.
+ */
+ smp_acquire__after_ctrl_dep();
+ return true;
+ }
+
+ /*
+ * If the reference count was already in the dead zone, then this
+ * put() operation is imbalanced. Warn, put the reference count back to
+ * DEAD and tell the caller to not deconstruct the object.
+ */
+ if (WARN_ONCE(cnt >= FILE_REF_RELEASED, "imbalanced put on file reference count")) {
+ atomic_long_set(&ref->refcnt, FILE_REF_DEAD);
+ return false;
+ }
+
+ /*
+ * This is a put() operation on a saturated refcount. Restore the
+ * mean saturation value and tell the caller to not deconstruct the
+ * object.
+ */
+ if (cnt > FILE_REF_MAXREF)
+ atomic_long_set(&ref->refcnt, FILE_REF_SATURATED);
+ return false;
+}
+EXPORT_SYMBOL_GPL(__file_ref_put);
+
unsigned int sysctl_nr_open __read_mostly = 1024*1024;
unsigned int sysctl_nr_open_min = BITS_PER_LONG;
/* our min() is unusable in constant expressions ;-/ */
new file mode 100644
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _LINUX_FILE_REF_H
+#define _LINUX_FILE_REF_H
+
+#ifdef CONFIG_64BIT
+#define FILE_REF_ONEREF 0x0000000000000000U
+#define FILE_REF_MAXREF 0x7FFFFFFFFFFFFFFFU
+#define FILE_REF_SATURATED 0xA000000000000000U
+#define FILE_REF_RELEASED 0xC000000000000000U
+#define FILE_REF_DEAD 0xE000000000000000U
+#define FILE_REF_NOREF 0xFFFFFFFFFFFFFFFFU
+#else
+#define FILE_REF_ONEREF 0x00000000U
+#define FILE_REF_MAXREF 0x7FFFFFFFU
+#define FILE_REF_SATURATED 0xA0000000U
+#define FILE_REF_RELEASED 0xC0000000U
+#define FILE_REF_DEAD 0xE0000000U
+#define FILE_REF_NOREF 0xFFFFFFFFU
+#endif
+
+typedef struct {
+#ifdef CONFIG_64BIT
+ atomic64_t refcnt;
+#else
+ atomic_t refcnt;
+#endif
+} file_ref_t;
+
+/**
+ * file_ref_init - Initialize a file reference count
+ * @ref: Pointer to the reference count
+ * @cnt: The initial reference count typically '1'
+ */
+static inline void file_ref_init(file_ref_t *ref, unsigned long cnt)
+{
+ atomic_long_set(&ref->refcnt, cnt - 1);
+}
+
+bool __file_ref_get(file_ref_t *ref);
+bool __file_ref_put(file_ref_t *ref);
+
+/**
+ * file_ref_get - Acquire one reference on a file
+ * @ref: Pointer to the reference count
+ *
+ * Similar to atomic_inc_not_zero() but saturates at FILE_REF_MAXREF.
+ *
+ * Provides full memory ordering.
+ *
+ * Return: False if the attempt to acquire a reference failed. This happens
+ * when the last reference has been put already. True if a reference
+ * was successfully acquired
+ */
+static __always_inline __must_check bool file_ref_get(file_ref_t *ref)
+{
+ /*
+ * Unconditionally increase the reference count with full
+ * ordering. The saturation and dead zones provide enough
+ * tolerance for this.
+ */
+ if (likely(!atomic_long_add_negative(1, &ref->refcnt)))
+ return true;
+
+ /* Handle the cases inside the saturation and dead zones */
+ return __file_ref_get(ref);
+}
+
+/**
+ * file_ref_put -- Release a file reference
+ * @ref: Pointer to the reference count
+ *
+ * Provides release memory ordering, such that prior loads and stores
+ * are done before, and provides an acquire ordering on success such
+ * that free() must come after.
+ *
+ * Return: True if this was the last reference with no future references
+ * possible. This signals the caller that it can safely release
+ * the object which is protected by the reference counter.
+ * False if there are still active references or the put() raced
+ * with a concurrent get()/put() pair. Caller is not allowed to
+ * release the protected object.
+ */
+static __always_inline __must_check bool file_ref_put(file_ref_t *ref)
+{
+ bool released;
+
+ preempt_disable();
+ /*
+ * Unconditionally decrease the reference count. The saturation
+ * and dead zones provide enough tolerance for this. If this
+ * fails then we need to handle the last reference drop and
+ * cases inside the saturation and dead zones.
+ */
+ if (likely(!atomic_long_add_negative_release(-1, &ref->refcnt)))
+ released = false;
+ else
+ released = __file_ref_put(ref);
+ preempt_enable();
+ return released;
+}
+
+/**
+ * file_ref_read - Read the number of file references
+ * @ref: Pointer to the reference count
+ *
+ * Return: The number of held references (0 ... N)
+ */
+static inline unsigned long file_ref_read(file_ref_t *ref)
+{
+ unsigned long c = atomic_long_read(&ref->refcnt);
+
+ /* Return 0 if within the DEAD zone. */
+ return c >= FILE_REF_RELEASED ? 0 : c + 1;
+}
+
+#endif