new file mode 100644
@@ -0,0 +1,270 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_HISTOGRAM_H
+#define _LINUX_HISTOGRAM_H
+
+#include <linux/compiler.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/types.h>
+
+/*
+ * Histograms for counting events represented by u64s.
+ *
+ * Each histogram may have any number of thresholds. A histogram with
+ * nr_thresholds thresholds has nr_thresholds buckets, where bucket[i] counts
+ * samples in the half-open interval ( thresholds[i-1], thresholds[i] ].
+ * (thresholds[-1] is implicitly defined as 0, and thresholds[nr_thresholds-1]
+ * must be 2**64-1.) Each histogram's thresholds are immutable after creation.
+ * Each bucket counts up to a u64 worth of events.
+ *
+ * Example usage:
+ *
+ * u64 thresholds[] = {1, 2, 5, 10, 20, 50, 100, ~0};
+ * u64 buckets[ARRAY_SIZE(thresholds)];
+ * struct histogram *hist = histogram_alloc(thresholds, ARRAY_SIZE(thresholds));
+ * if (IS_ERR(hist)) {...}
+ * histogram_record(hist, 0, 1);
+ * histogram_record(hist, 8, 2);
+ * histogram_record(hist, 20, 3);
+ * histogram_record(hist, 110, 4);
+ * histogram_read_buckets(hist, buckets);
+ * // buckets == {1, 0, 0, 2, 3, 0, 0, 4}
+ * histogram_free(hist);
+ *
+ * For convenience, a struct histogram_rcu type is also provided which wraps an
+ * RCU-sched protected pointer to a struct histogram, allowing thresholds to be
+ * modified even while other threads may be recording to the histogram.
+ * Functions that operate on a histogram_rcu are distinguished by an _rcu
+ * suffix.
+ *
+ * Example usage:
+ *
+ * u64 thresholds_1[] = {1, 2, 5, 10, 20, 50, 100, ~0};
+ * u64 thresholds_2[] = {100, 200, 500, 1000, ~0};
+ * u64 buckets[ARRAY_SIZE(thresholds_1)];
+ * struct histogram_rcu hrcu;
+ * if (histogram_init_rcu(&hrcu, thresholds_1, ARRAY_SIZE(thresholds_1))) {...}
+ * histogram_record_rcu(&hrcu, 0, 1);
+ * histogram_record_rcu(&hrcu, 8, 2);
+ * histogram_record_rcu(&hrcu, 20, 3);
+ * histogram_record_rcu(&hrcu, 110, 4);
+ * if (histogram_read_rcu(&hrcu, buckets, NULL,
+ * ARRAY_SIZE(thresholds_1)) < 0) {...}
+ * // buckets == {1, 0, 0, 2, 3, 0, 0, 4}
+ * if (histogram_set_thresholds_rcu(&hrcu, thresholds_2,
+ * ARRAY_SIZE(thresholds_2)) {...}
+ * if (histogram_read_rcu(&hrcu, buckets, NULL,
+ * ARRAY_SIZE(thresholds_2)) < 0) {...}
+ * // buckets == {0, 0, 0, 0, 0}
+ * histogram_record_rcu(&hrcu, 50, 1);
+ * histogram_record_rcu(&hrcu, 150, 2);
+ * histogram_record_rcu(&hrcu, 5000, 3);
+ * if (histogram_read_rcu(&hrcu, buckets, NULL,
+ * ARRAY_SIZE(thresholds_2)) < 0) {...}
+ * // buckets == {1, 2, 0, 0, 3}
+ * histogram_destroy_rcu(&hrcu);
+ */
+
+struct histogram {
+ struct rcu_head rcu;
+ u64 __percpu *buckets;
+ size_t nr_thresholds;
+ u64 thresholds[0]; /* flexible array member */
+};
+
+struct histogram_rcu {
+ struct histogram __rcu *hist;
+};
+
+/**
+ * histogram_record() - record samples in histogram
+ * @hist: histogram
+ * @val: sample value
+ * @count: number of samples
+ *
+ * histogram_record() does not require synchronization with concurrent
+ * histogram_record() or histogram_read_buckets().
+ *
+ * histogram_record() may be called from tracing code.
+ */
+void histogram_record(struct histogram *hist, u64 val, u64 count);
+
+/**
+ * histogram_read_buckets() - read histogram buckets
+ * @hist: histogram
+ * @buckets: array with space for at least hist->nr_thresholds elements
+ *
+ * Sets each element of @buckets to the value of the corresponding histogram
+ * bucket.
+ *
+ * histogram_read_buckets() does not require synchronization with concurrent
+ * histogram_record() or histogram_read_buckets().
+ *
+ * histogram_read_buckets() does not block recording, so values are not read
+ * from all CPUs atomically. If this is a problem, the caller should stop
+ * recording first.
+ */
+void histogram_read_buckets(const struct histogram *hist, u64 *buckets);
+
+/**
+ * histogram_alloc() - create a histogram
+ * @thresholds: thresholds array
+ * @nr_thresholds: number of elements in @thresholds
+ *
+ * Histogram buckets are initialized to zero. @thresholds must be sorted in
+ * ascending order and must not contain any duplicates. @nr_thresholds must be
+ * >= 1. thresholds[nr_thresholds-1] must be ~(u64)0.
+ *
+ * histogram_alloc() makes a copy of @thresholds if successful, so ownership of
+ * @thresholds is unaffected by histogram_alloc().
+ *
+ * Context: Performs allocation with GFP_KERNEL.
+ *
+ * Returns: Pointer to new histogram, or a PTR_ERR on failure.
+ */
+struct histogram *histogram_alloc(const u64 *thresholds, size_t nr_thresholds);
+
+/**
+ * histogram_free() - delete a histogram
+ * @hist: histogram
+ */
+void histogram_free(struct histogram *hist);
+
+/**
+ * histogram_record_rcu() - record samples in RCU-protected histogram
+ * @hrcu: histogram
+ * @val: sample value
+ * @count: number of samples
+ *
+ * histogram_record_rcu() does not require external synchronization, even with
+ * histogram_destroy_rcu(). Calling histogram_record_rcu() on a @hrcu that
+ * histogram_destroy_rcu() has been called on is a no-op.
+ *
+ * histogram_record_rcu() may be called from tracing code.
+ */
+static inline void histogram_record_rcu(struct histogram_rcu *hrcu, u64 val,
+ u64 count)
+{
+ struct histogram *hist;
+
+ rcu_read_lock_sched_notrace();
+ hist = rcu_dereference_sched(hrcu->hist);
+ if (likely(hist))
+ histogram_record(hist, val, count);
+ rcu_read_unlock_sched_notrace();
+}
+
+/**
+ * histogram_read_rcu() - read RCU-protected histogram
+ * @hrcu: histogram
+ * @buckets: array with space for at least nr_thresholds elements
+ * @thresholds: array with space for at least nr_thresholds elements
+ * @nr_thresholds: array size (see above)
+ *
+ * If @buckets is not NULL, sets each element of @buckets to the value of the
+ * corresponding histogram bucket.
+ *
+ * If @thresholds is not NULL, sets each element of @thresholds to the
+ * corresponding histogram bucket threshold.
+ *
+ * On failure (for example, if nr_thresholds < hrcu->hist->nr_thresholds),
+ * neither @buckets nor @thresholds will be modified.
+ *
+ * histogram_read_rcu() does not require external synchronization, even with
+ * histogram_destroy_rcu(). Calling histogram_read_rcu() on a @hrcu that
+ * histogram_destroy_rcu() has been called on returns 0.
+ *
+ * histogram_read_rcu() does not block recording, so bucket values are not read
+ * from all CPUs atomically. If this is a problem, the caller should stop
+ * recording first.
+ *
+ * Returns: If successful, returns the actual number of thresholds stored in
+ * @thresholds; if nr_thresholds is too small, returns the negative of the
+ * minimum required nr_thresholds to succeed; if the histogram has been
+ * destroyed by histogram_destroy_rcu(), returns 0. (Note that if nr_thresholds
+ * is too small, it is not guaranteed that calling histogram_read_rcu() again
+ * with the returned value of nr_thresholds will succeed, because another
+ * thread could raise the number of thresholds again in the interim.)
+ */
+ssize_t histogram_read_rcu(const struct histogram_rcu *hrcu, u64 *buckets,
+ u64 *thresholds, size_t nr_thresholds);
+
+/**
+ * histogram_set_thresholds_rcu() - set RCU-protected histogram thresholds
+ * @hrcu: histogram
+ * @thresholds: thresholds array
+ * @nr_thresholds: number of elements in @thresholds
+ *
+ * The semantics that apply to @thresholds are the same as for histogram_alloc.
+ *
+ * If successful, all buckets are atomically reset to zero.
+ *
+ * The caller must synchronize between concurrent calls to
+ * histogram_set_thresholds_rcu(), histogram_init_rcu(), and
+ * histogram_destroy_rcu().
+ *
+ * Context: Performs allocation with GFP_KERNEL.
+ *
+ * Returns: Zero on success, or a negative error code on failure.
+ */
+int histogram_set_thresholds_rcu(struct histogram_rcu *hrcu,
+ const u64 *thresholds, size_t nr_thresholds);
+
+/**
+ * histogram_init_rcu() - initialize RCU-protected histogram
+ * @hrcu: histogram
+ * @thresholds: thresholds array
+ * @nr_thresholds: number of elements in @thresholds.
+ *
+ * Each struct histogram_rcu must be initialized by histogram_init_rcu() at
+ * least once before it is valid to call any other functions on it. A struct
+ * histogram_rcu that has been previously initialized cannot be initialized
+ * again, unless it has been subsequently destroyed by histogram_destroy_rcu().
+ *
+ * The semantics that apply to @thresholds are the same as for
+ * histogram_alloc(), with one exception: @thresholds may be NULL iff
+ * @nr_thresholds is 0. In this case, @hrcu will behave as if it has already
+ * been destroyed (histogram_record_rcu() will no-op and histogram_read_rcu()
+ * will return 0).
+ *
+ * The caller must synchronize between concurrent calls to
+ * histogram_set_thresholds_rcu(), histogram_init_rcu(), and
+ * histogram_destroy_rcu().
+ *
+ * Context: Performs allocation with GFP_KERNEL.
+ *
+ * Returns: Zero on success, or a negative error code on failure.
+ */
+static inline int histogram_init_rcu(struct histogram_rcu *hrcu,
+ const u64 *thresholds,
+ size_t nr_thresholds)
+{
+ RCU_INIT_POINTER(hrcu->hist, NULL);
+ if (!thresholds && !nr_thresholds)
+ return 0;
+ return histogram_set_thresholds_rcu(hrcu, thresholds, nr_thresholds);
+}
+
+void histogram_destroy_rcu_cb(struct rcu_head *rcu);
+
+/**
+ * histogram_destroy_rcu() - destroy RCU-protected histogram
+ * @hrcu: histogram
+ *
+ * After histogram_destroy_rcu() has been called on a @hrcu, it is valid to call
+ * histogram_init_rcu() on it again.
+ *
+ * The caller must synchronize between concurrent calls to
+ * histogram_set_thresholds_rcu(), histogram_init_rcu(), and
+ * histogram_destroy_rcu().
+ */
+static inline void histogram_destroy_rcu(struct histogram_rcu *hrcu)
+{
+ struct histogram *hist = rcu_dereference_raw(hrcu->hist);
+
+ RCU_INIT_POINTER(hrcu->hist, NULL);
+ if (likely(hist))
+ call_rcu(&hist->rcu, histogram_destroy_rcu_cb);
+}
+
+#endif /* _LINUX_HISTOGRAM_H */
@@ -648,6 +648,9 @@ config OBJAGG
config STRING_SELFTEST
tristate "Test string functions"
+config HISTOGRAM
+ bool
+
endmenu
config GENERIC_IOREMAP
@@ -248,6 +248,8 @@ obj-$(CONFIG_ASN1) += asn1_decoder.o
obj-$(CONFIG_FONT_SUPPORT) += fonts/
+obj-$(CONFIG_HISTOGRAM) += histogram.o
+
hostprogs := gen_crc32table
hostprogs += gen_crc64table
clean-files := crc32table.h
new file mode 100644
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/cache.h>
+#include <linux/cpumask.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/histogram.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+
+void histogram_record(struct histogram *hist, u64 val, u64 count)
+{
+ size_t i, lower, upper;
+
+ /* Binary search invariant: correct bucket in [lower, upper] */
+ lower = 0;
+ upper = hist->nr_thresholds - 1;
+ while (lower < upper) {
+ /* can't realistically overflow, nr_thresholds < 2**63 */
+ i = (lower + upper) / 2;
+ if (val <= hist->thresholds[i])
+ upper = i;
+ else
+ lower = i + 1;
+ }
+ i = upper;
+ /* _notrace because histogram_record may be called from tracing */
+ preempt_disable_notrace();
+ __this_cpu_add(hist->buckets[i], count);
+ preempt_enable_notrace();
+}
+EXPORT_SYMBOL_GPL(histogram_record);
+
+void histogram_read_buckets(const struct histogram *hist, u64 *buckets)
+{
+ int cpu;
+ size_t nr_buckets;
+ size_t i;
+
+ nr_buckets = hist->nr_thresholds;
+ memset(buckets, 0, nr_buckets * sizeof(u64));
+ /*
+ * This must use for_each_possible_cpu rather than for_each_online_cpu
+ * to ensure we count records from any CPUs that have since been
+ * removed.
+ */
+ for_each_possible_cpu(cpu) {
+ for (i = 0; i < nr_buckets; i++)
+ buckets[i] += per_cpu(hist->buckets[i], cpu);
+ }
+}
+EXPORT_SYMBOL_GPL(histogram_read_buckets);
+
+static int histogram_check_thresholds(const u64 *thresholds,
+ size_t nr_thresholds)
+{
+ unsigned int i;
+
+ if (!nr_thresholds)
+ return -EINVAL;
+ if (!thresholds)
+ return -EFAULT;
+ for (i = 1; i < nr_thresholds; i++)
+ if (thresholds[i - 1] >= thresholds[i])
+ return -EINVAL;
+ if (thresholds[nr_thresholds - 1] != ~0ULL)
+ return -EINVAL;
+ return 0;
+}
+
+struct histogram *histogram_alloc(const u64 *thresholds, size_t nr_thresholds)
+{
+ struct histogram *hist;
+ size_t hist_size;
+ int ret;
+
+ ret = histogram_check_thresholds(thresholds, nr_thresholds);
+ if (ret)
+ return ERR_PTR(ret);
+ hist_size = sizeof(struct histogram) + nr_thresholds * sizeof(u64);
+ hist = kmalloc(ALIGN(hist_size, cache_line_size()), GFP_KERNEL);
+ if (!hist)
+ return ERR_PTR(-ENOMEM);
+ hist->buckets = __alloc_percpu(nr_thresholds * sizeof(*hist->buckets),
+ __alignof__(*hist->buckets));
+ if (!hist->buckets) {
+ kfree(hist);
+ return ERR_PTR(-ENOMEM);
+ }
+ hist->nr_thresholds = nr_thresholds;
+ memcpy(hist->thresholds, thresholds, nr_thresholds * sizeof(u64));
+ return hist;
+}
+EXPORT_SYMBOL_GPL(histogram_alloc);
+
+void histogram_free(struct histogram *hist)
+{
+ if (!hist)
+ return;
+ free_percpu(hist->buckets);
+ kfree(hist);
+}
+EXPORT_SYMBOL_GPL(histogram_free);
+
+ssize_t histogram_read_rcu(const struct histogram_rcu *hrcu, u64 *buckets,
+ u64 *thresholds, size_t nr_thresholds)
+{
+ const struct histogram *hist;
+ ssize_t ret = 0;
+
+ rcu_read_lock_sched();
+ hist = rcu_dereference_sched(hrcu->hist);
+ if (!hist) {
+ ret = 0;
+ goto out;
+ }
+ if (nr_thresholds < hist->nr_thresholds) {
+ ret = -hist->nr_thresholds;
+ goto out;
+ }
+ if (buckets)
+ histogram_read_buckets(hist, buckets);
+ if (thresholds)
+ memcpy(thresholds, hist->thresholds,
+ hist->nr_thresholds * sizeof(u64));
+ ret = hist->nr_thresholds;
+out:
+ rcu_read_unlock_sched();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(histogram_read_rcu);
+
+int histogram_set_thresholds_rcu(struct histogram_rcu *hrcu,
+ const u64 *thresholds, size_t nr_thresholds)
+{
+ struct histogram *old_hist = rcu_dereference_raw(hrcu->hist);
+ struct histogram *new_hist = histogram_alloc(thresholds, nr_thresholds);
+
+ if (IS_ERR(new_hist))
+ return PTR_ERR(new_hist);
+ rcu_assign_pointer(hrcu->hist, new_hist);
+ if (old_hist) {
+ synchronize_rcu();
+ histogram_free(old_hist);
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(histogram_set_thresholds_rcu);
+
+void histogram_destroy_rcu_cb(struct rcu_head *rcu)
+{
+ histogram_free(container_of(rcu, struct histogram, rcu));
+}
+EXPORT_SYMBOL_GPL(histogram_destroy_rcu_cb);
struct histogram provides a histogram that counts u64 samples in arbitrary user-configurable buckets, optimized for efficient concurrent recording. This is a squashed, refactored, and modified version of a previously-internal implementation. Thanks to the following individuals for portions of the implementation: Jamie Liu <jamieliu@google.com> - Original implementation Yu Zhao <yuzhao@google.com> - Code cleanups + simplification Signed-off-by: Axel Rasmussen <axelrasmussen@google.com> --- include/linux/histogram.h | 270 ++++++++++++++++++++++++++++++++++++++ lib/Kconfig | 3 + lib/Makefile | 2 + lib/histogram.c | 157 ++++++++++++++++++++++ 4 files changed, 432 insertions(+) create mode 100644 include/linux/histogram.h create mode 100644 lib/histogram.c