Message ID | 20230501165450.15352-8-surenb@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Memory allocation profiling | expand |
Hi-- On 5/1/23 09:54, Suren Baghdasaryan wrote: > From: Kent Overstreet <kent.overstreet@linux.dev> > > This patch adds lib/lazy-percpu-counter.c, which implements counters > that start out as atomics, but lazily switch to percpu mode if the > update rate crosses some threshold (arbitrarily set at 256 per second). > from submitting-patches.rst: Describe your changes in imperative mood, e.g. "make xyzzy do frotz" instead of "[This patch] makes xyzzy do frotz" or "[I] changed xyzzy to do frotz", as if you are giving orders to the codebase to change its behaviour. > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> > Signed-off-by: Suren Baghdasaryan <surenb@google.com> > --- > include/linux/lazy-percpu-counter.h | 102 ++++++++++++++++++++++ > lib/Kconfig | 3 + > lib/Makefile | 2 + > lib/lazy-percpu-counter.c | 127 ++++++++++++++++++++++++++++ > 4 files changed, 234 insertions(+) > create mode 100644 include/linux/lazy-percpu-counter.h > create mode 100644 lib/lazy-percpu-counter.c > > diff --git a/include/linux/lazy-percpu-counter.h b/include/linux/lazy-percpu-counter.h > new file mode 100644 > index 000000000000..45ca9e2ce58b > --- /dev/null > +++ b/include/linux/lazy-percpu-counter.h > @@ -0,0 +1,102 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Lazy percpu counters: > + * (C) 2022 Kent Overstreet > + * > + * Lazy percpu counters start out in atomic mode, then switch to percpu mode if > + * the update rate crosses some threshold. > + * > + * This means we don't have to decide between low memory overhead atomic > + * counters and higher performance percpu counters - we can have our cake and > + * eat it, too! > + * > + * Internally we use an atomic64_t, where the low bit indicates whether we're in > + * percpu mode, and the high 8 bits are a secondary counter that's incremented > + * when the counter is modified - meaning 55 bits of precision are available for > + * the counter itself. > + */ > + > +#ifndef _LINUX_LAZY_PERCPU_COUNTER_H > +#define _LINUX_LAZY_PERCPU_COUNTER_H > + > +#include <linux/atomic.h> > +#include <asm/percpu.h> > + > +struct lazy_percpu_counter { > + atomic64_t v; > + unsigned long last_wrap; > +}; > + > +void lazy_percpu_counter_exit(struct lazy_percpu_counter *c); > +void lazy_percpu_counter_add_slowpath(struct lazy_percpu_counter *c, s64 i); > +void lazy_percpu_counter_add_slowpath_noupgrade(struct lazy_percpu_counter *c, s64 i); > +s64 lazy_percpu_counter_read(struct lazy_percpu_counter *c); > + > +/* > + * We use the high bits of the atomic counter for a secondary counter, which is > + * incremented every time the counter is touched. When the secondary counter > + * wraps, we check the time the counter last wrapped, and if it was recent > + * enough that means the update frequency has crossed our threshold and we > + * switch to percpu mode: > + */ > +#define COUNTER_MOD_BITS 8 > +#define COUNTER_MOD_MASK ~(~0ULL >> COUNTER_MOD_BITS) > +#define COUNTER_MOD_BITS_START (64 - COUNTER_MOD_BITS) > + > +/* > + * We use the low bit of the counter to indicate whether we're in atomic mode > + * (low bit clear), or percpu mode (low bit set, counter is a pointer to actual > + * percpu counters: > + */ > +#define COUNTER_IS_PCPU_BIT 1 > + > +static inline u64 __percpu *lazy_percpu_counter_is_pcpu(u64 v) > +{ > + if (!(v & COUNTER_IS_PCPU_BIT)) > + return NULL; > + > + v ^= COUNTER_IS_PCPU_BIT; > + return (u64 __percpu *)(unsigned long)v; > +} > + > +/** > + * lazy_percpu_counter_add: Add a value to a lazy_percpu_counter For kernel-doc, the function name should be followed by '-', not ':'. (many places) > + * > + * @c: counter to modify > + * @i: value to add > + */ > +static inline void lazy_percpu_counter_add(struct lazy_percpu_counter *c, s64 i) > +{ > + u64 v = atomic64_read(&c->v); > + u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v); > + > + if (likely(pcpu_v)) > + this_cpu_add(*pcpu_v, i); > + else > + lazy_percpu_counter_add_slowpath(c, i); > +} > + > +/** > + * lazy_percpu_counter_add_noupgrade: Add a value to a lazy_percpu_counter, > + * without upgrading to percpu mode > + * > + * @c: counter to modify > + * @i: value to add > + */ > +static inline void lazy_percpu_counter_add_noupgrade(struct lazy_percpu_counter *c, s64 i) > +{ > + u64 v = atomic64_read(&c->v); > + u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v); > + > + if (likely(pcpu_v)) > + this_cpu_add(*pcpu_v, i); > + else > + lazy_percpu_counter_add_slowpath_noupgrade(c, i); > +} > + > +static inline void lazy_percpu_counter_sub(struct lazy_percpu_counter *c, s64 i) > +{ > + lazy_percpu_counter_add(c, -i); > +} > + > +#endif /* _LINUX_LAZY_PERCPU_COUNTER_H */ > diff --git a/lib/lazy-percpu-counter.c b/lib/lazy-percpu-counter.c > new file mode 100644 > index 000000000000..4f4e32c2dc09 > --- /dev/null > +++ b/lib/lazy-percpu-counter.c > @@ -0,0 +1,127 @@ > +// SPDX-License-Identifier: GPL-2.0-only > + > +#include <linux/atomic.h> > +#include <linux/gfp.h> > +#include <linux/jiffies.h> > +#include <linux/lazy-percpu-counter.h> > +#include <linux/percpu.h> > + > +static inline s64 lazy_percpu_counter_atomic_val(s64 v) > +{ > + /* Ensure output is sign extended properly: */ > + return (v << COUNTER_MOD_BITS) >> > + (COUNTER_MOD_BITS + COUNTER_IS_PCPU_BIT); > +} > + ... > + > +/** > + * lazy_percpu_counter_exit: Free resources associated with a > + * lazy_percpu_counter Same kernel-doc comment. > + * > + * @c: counter to exit > + */ > +void lazy_percpu_counter_exit(struct lazy_percpu_counter *c) > +{ > + free_percpu(lazy_percpu_counter_is_pcpu(atomic64_read(&c->v))); > +} > +EXPORT_SYMBOL_GPL(lazy_percpu_counter_exit); > + > +/** > + * lazy_percpu_counter_read: Read current value of a lazy_percpu_counter > + * > + * @c: counter to read > + */ > +s64 lazy_percpu_counter_read(struct lazy_percpu_counter *c) > +{ > + s64 v = atomic64_read(&c->v); > + u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v); > + > + if (pcpu_v) { > + int cpu; > + > + v = 0; > + for_each_possible_cpu(cpu) > + v += *per_cpu_ptr(pcpu_v, cpu); > + } else { > + v = lazy_percpu_counter_atomic_val(v); > + } > + > + return v; > +} > +EXPORT_SYMBOL_GPL(lazy_percpu_counter_read); > + > +void lazy_percpu_counter_add_slowpath(struct lazy_percpu_counter *c, s64 i) > +{ > + u64 atomic_i; > + u64 old, v = atomic64_read(&c->v); > + u64 __percpu *pcpu_v; > + > + atomic_i = i << COUNTER_IS_PCPU_BIT; > + atomic_i &= ~COUNTER_MOD_MASK; > + atomic_i |= 1ULL << COUNTER_MOD_BITS_START; > + > + do { > + pcpu_v = lazy_percpu_counter_is_pcpu(v); > + if (pcpu_v) { > + this_cpu_add(*pcpu_v, i); > + return; > + } > + > + old = v; > + } while ((v = atomic64_cmpxchg(&c->v, old, old + atomic_i)) != old); > + > + if (unlikely(!(v & COUNTER_MOD_MASK))) { > + unsigned long now = jiffies; > + > + if (c->last_wrap && > + unlikely(time_after(c->last_wrap + HZ, now))) > + lazy_percpu_counter_switch_to_pcpu(c); > + else > + c->last_wrap = now; > + } > +} > +EXPORT_SYMBOL(lazy_percpu_counter_add_slowpath); > + > +void lazy_percpu_counter_add_slowpath_noupgrade(struct lazy_percpu_counter *c, s64 i) > +{ > + u64 atomic_i; > + u64 old, v = atomic64_read(&c->v); > + u64 __percpu *pcpu_v; > + > + atomic_i = i << COUNTER_IS_PCPU_BIT; > + atomic_i &= ~COUNTER_MOD_MASK; > + > + do { > + pcpu_v = lazy_percpu_counter_is_pcpu(v); > + if (pcpu_v) { > + this_cpu_add(*pcpu_v, i); > + return; > + } > + > + old = v; > + } while ((v = atomic64_cmpxchg(&c->v, old, old + atomic_i)) != old); > +} > +EXPORT_SYMBOL(lazy_percpu_counter_add_slowpath_noupgrade); These last 2 exported functions could use some comments, preferably in kernel-doc format. Thanks.
diff --git a/include/linux/lazy-percpu-counter.h b/include/linux/lazy-percpu-counter.h new file mode 100644 index 000000000000..45ca9e2ce58b --- /dev/null +++ b/include/linux/lazy-percpu-counter.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Lazy percpu counters: + * (C) 2022 Kent Overstreet + * + * Lazy percpu counters start out in atomic mode, then switch to percpu mode if + * the update rate crosses some threshold. + * + * This means we don't have to decide between low memory overhead atomic + * counters and higher performance percpu counters - we can have our cake and + * eat it, too! + * + * Internally we use an atomic64_t, where the low bit indicates whether we're in + * percpu mode, and the high 8 bits are a secondary counter that's incremented + * when the counter is modified - meaning 55 bits of precision are available for + * the counter itself. + */ + +#ifndef _LINUX_LAZY_PERCPU_COUNTER_H +#define _LINUX_LAZY_PERCPU_COUNTER_H + +#include <linux/atomic.h> +#include <asm/percpu.h> + +struct lazy_percpu_counter { + atomic64_t v; + unsigned long last_wrap; +}; + +void lazy_percpu_counter_exit(struct lazy_percpu_counter *c); +void lazy_percpu_counter_add_slowpath(struct lazy_percpu_counter *c, s64 i); +void lazy_percpu_counter_add_slowpath_noupgrade(struct lazy_percpu_counter *c, s64 i); +s64 lazy_percpu_counter_read(struct lazy_percpu_counter *c); + +/* + * We use the high bits of the atomic counter for a secondary counter, which is + * incremented every time the counter is touched. When the secondary counter + * wraps, we check the time the counter last wrapped, and if it was recent + * enough that means the update frequency has crossed our threshold and we + * switch to percpu mode: + */ +#define COUNTER_MOD_BITS 8 +#define COUNTER_MOD_MASK ~(~0ULL >> COUNTER_MOD_BITS) +#define COUNTER_MOD_BITS_START (64 - COUNTER_MOD_BITS) + +/* + * We use the low bit of the counter to indicate whether we're in atomic mode + * (low bit clear), or percpu mode (low bit set, counter is a pointer to actual + * percpu counters: + */ +#define COUNTER_IS_PCPU_BIT 1 + +static inline u64 __percpu *lazy_percpu_counter_is_pcpu(u64 v) +{ + if (!(v & COUNTER_IS_PCPU_BIT)) + return NULL; + + v ^= COUNTER_IS_PCPU_BIT; + return (u64 __percpu *)(unsigned long)v; +} + +/** + * lazy_percpu_counter_add: Add a value to a lazy_percpu_counter + * + * @c: counter to modify + * @i: value to add + */ +static inline void lazy_percpu_counter_add(struct lazy_percpu_counter *c, s64 i) +{ + u64 v = atomic64_read(&c->v); + u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v); + + if (likely(pcpu_v)) + this_cpu_add(*pcpu_v, i); + else + lazy_percpu_counter_add_slowpath(c, i); +} + +/** + * lazy_percpu_counter_add_noupgrade: Add a value to a lazy_percpu_counter, + * without upgrading to percpu mode + * + * @c: counter to modify + * @i: value to add + */ +static inline void lazy_percpu_counter_add_noupgrade(struct lazy_percpu_counter *c, s64 i) +{ + u64 v = atomic64_read(&c->v); + u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v); + + if (likely(pcpu_v)) + this_cpu_add(*pcpu_v, i); + else + lazy_percpu_counter_add_slowpath_noupgrade(c, i); +} + +static inline void lazy_percpu_counter_sub(struct lazy_percpu_counter *c, s64 i) +{ + lazy_percpu_counter_add(c, -i); +} + +#endif /* _LINUX_LAZY_PERCPU_COUNTER_H */ diff --git a/lib/Kconfig b/lib/Kconfig index 5c2da561c516..7380292a8fcd 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -505,6 +505,9 @@ config ASSOCIATIVE_ARRAY for more information. +config LAZY_PERCPU_COUNTER + bool + config HAS_IOMEM bool depends on !NO_IOMEM diff --git a/lib/Makefile b/lib/Makefile index 876fcdeae34e..293a0858a3f8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -164,6 +164,8 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o +obj-$(CONFIG_LAZY_PERCPU_COUNTER) += lazy-percpu-counter.o + obj-$(CONFIG_BITREVERSE) += bitrev.o obj-$(CONFIG_LINEAR_RANGES) += linear_ranges.o obj-$(CONFIG_PACKING) += packing.o diff --git a/lib/lazy-percpu-counter.c b/lib/lazy-percpu-counter.c new file mode 100644 index 000000000000..4f4e32c2dc09 --- /dev/null +++ b/lib/lazy-percpu-counter.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/atomic.h> +#include <linux/gfp.h> +#include <linux/jiffies.h> +#include <linux/lazy-percpu-counter.h> +#include <linux/percpu.h> + +static inline s64 lazy_percpu_counter_atomic_val(s64 v) +{ + /* Ensure output is sign extended properly: */ + return (v << COUNTER_MOD_BITS) >> + (COUNTER_MOD_BITS + COUNTER_IS_PCPU_BIT); +} + +static void lazy_percpu_counter_switch_to_pcpu(struct lazy_percpu_counter *c) +{ + u64 __percpu *pcpu_v = alloc_percpu_gfp(u64, GFP_ATOMIC|__GFP_NOWARN); + u64 old, new, v; + + if (!pcpu_v) + return; + + preempt_disable(); + v = atomic64_read(&c->v); + do { + if (lazy_percpu_counter_is_pcpu(v)) { + free_percpu(pcpu_v); + return; + } + + old = v; + new = (unsigned long)pcpu_v | 1; + + *this_cpu_ptr(pcpu_v) = lazy_percpu_counter_atomic_val(v); + } while ((v = atomic64_cmpxchg(&c->v, old, new)) != old); + preempt_enable(); +} + +/** + * lazy_percpu_counter_exit: Free resources associated with a + * lazy_percpu_counter + * + * @c: counter to exit + */ +void lazy_percpu_counter_exit(struct lazy_percpu_counter *c) +{ + free_percpu(lazy_percpu_counter_is_pcpu(atomic64_read(&c->v))); +} +EXPORT_SYMBOL_GPL(lazy_percpu_counter_exit); + +/** + * lazy_percpu_counter_read: Read current value of a lazy_percpu_counter + * + * @c: counter to read + */ +s64 lazy_percpu_counter_read(struct lazy_percpu_counter *c) +{ + s64 v = atomic64_read(&c->v); + u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v); + + if (pcpu_v) { + int cpu; + + v = 0; + for_each_possible_cpu(cpu) + v += *per_cpu_ptr(pcpu_v, cpu); + } else { + v = lazy_percpu_counter_atomic_val(v); + } + + return v; +} +EXPORT_SYMBOL_GPL(lazy_percpu_counter_read); + +void lazy_percpu_counter_add_slowpath(struct lazy_percpu_counter *c, s64 i) +{ + u64 atomic_i; + u64 old, v = atomic64_read(&c->v); + u64 __percpu *pcpu_v; + + atomic_i = i << COUNTER_IS_PCPU_BIT; + atomic_i &= ~COUNTER_MOD_MASK; + atomic_i |= 1ULL << COUNTER_MOD_BITS_START; + + do { + pcpu_v = lazy_percpu_counter_is_pcpu(v); + if (pcpu_v) { + this_cpu_add(*pcpu_v, i); + return; + } + + old = v; + } while ((v = atomic64_cmpxchg(&c->v, old, old + atomic_i)) != old); + + if (unlikely(!(v & COUNTER_MOD_MASK))) { + unsigned long now = jiffies; + + if (c->last_wrap && + unlikely(time_after(c->last_wrap + HZ, now))) + lazy_percpu_counter_switch_to_pcpu(c); + else + c->last_wrap = now; + } +} +EXPORT_SYMBOL(lazy_percpu_counter_add_slowpath); + +void lazy_percpu_counter_add_slowpath_noupgrade(struct lazy_percpu_counter *c, s64 i) +{ + u64 atomic_i; + u64 old, v = atomic64_read(&c->v); + u64 __percpu *pcpu_v; + + atomic_i = i << COUNTER_IS_PCPU_BIT; + atomic_i &= ~COUNTER_MOD_MASK; + + do { + pcpu_v = lazy_percpu_counter_is_pcpu(v); + if (pcpu_v) { + this_cpu_add(*pcpu_v, i); + return; + } + + old = v; + } while ((v = atomic64_cmpxchg(&c->v, old, old + atomic_i)) != old); +} +EXPORT_SYMBOL(lazy_percpu_counter_add_slowpath_noupgrade);