@@ -8,6 +8,8 @@
#ifndef _DAMON_H_
#define _DAMON_H_
+#include <linux/mutex.h>
+#include <linux/time64.h>
#include <linux/types.h>
/**
@@ -23,11 +25,13 @@ struct damon_addr_range {
/**
* struct damon_region - Represents a monitoring target region.
* @ar: The address range of the region.
+ * @sampling_addr: Address of the sample for the next access check.
* @nr_accesses: Access frequency of this region.
* @list: List head for siblings.
*/
struct damon_region {
struct damon_addr_range ar;
+ unsigned long sampling_addr;
unsigned int nr_accesses;
struct list_head list;
};
@@ -50,12 +54,130 @@ struct damon_target {
struct list_head list;
};
+struct damon_ctx;
+
/**
- * struct damon_ctx - Represents a context for each monitoring.
+ * struct damon_primitive Monitoring primitives for given use cases.
+ *
+ * @init_target_regions: Constructs initial monitoring target regions.
+ * @prepare_access_checks: Prepares next access check of target regions.
+ * @check_accesses: Checks the access of target regions.
+ * @target_valid: Determine if the target is valid.
+ * @cleanup: Cleans up the context.
+ *
+ * DAMON can be extended for various address spaces and usages. For this,
+ * users should register the low level primitives for their target address
+ * space and usecase via the &damon_ctx.primitive. Then, the monitoring thread
+ * calls @init_target_regions before starting the monitoring and
+ * @prepare_access_checks, @check_accesses, and @target_valid for each
+ * @sample_interval.
+ *
+ * @init_target_regions should construct proper monitoring target regions and
+ * link those to the DAMON context struct.
+ * @prepare_access_checks should manipulate the monitoring regions to be
+ * prepare for the next access check.
+ * @check_accesses should check the accesses to each region that made after the
+ * last preparation and update the `->nr_accesses` of each region. It should
+ * also return max &damon_region.nr_accesses that made as a result of its
+ * update.
+ * @target_valid should check whether the target is still valid for the
+ * monitoring.
+ * @cleanup is called from @kdamond just before its termination. After this
+ * call, only @kdamond_lock and @kdamond will be touched.
+ */
+struct damon_primitive {
+ void (*init_target_regions)(struct damon_ctx *context);
+ void (*prepare_access_checks)(struct damon_ctx *context);
+ unsigned int (*check_accesses)(struct damon_ctx *context);
+ bool (*target_valid)(struct damon_target *target);
+ void (*cleanup)(struct damon_ctx *context);
+};
+
+/*
+ * struct damon_callback Monitoring events notification callbacks.
+ *
+ * @before_start: Called before starting the monitoring.
+ * @after_sampling: Called after each sampling.
+ * @after_aggregation: Called after each aggregation.
+ * @before_terminate: Called before terminating the monitoring.
+ * @private: User private data.
+ *
+ * The monitoring thread (&damon_ctx->kdamond) calls @before_start and
+ * @before_terminate just before starting the monitoring and just before
+ * finishing the monitoring. Therefore, those are good places for installing
+ * and cleaning @private.
+ *
+ * The monitoring thread calls @after_sampling and @after_aggregation for each
+ * of the sampling intervals and aggregation intervals, respectively.
+ * Therefore, users can safely access the monitoring results via
+ * &damon_ctx.targets_list without additional protection of
+ * damon_ctx.kdamond_lock. For the reason, users are recommended to use these
+ * callback for the accesses to the results.
+ *
+ * If any callback returns non-zero, monitoring stops.
+ */
+struct damon_callback {
+ void *private;
+
+ int (*before_start)(struct damon_ctx *context);
+ int (*after_sampling)(struct damon_ctx *context);
+ int (*after_aggregation)(struct damon_ctx *context);
+ int (*before_terminate)(struct damon_ctx *context);
+};
+
+/**
+ * struct damon_ctx - Represents a context for each monitoring. This is the
+ * main interface that allows users to set the attributes and get the results
+ * of the monitoring.
+ *
+ * @sample_interval: The time between access samplings.
+ * @aggr_interval: The time between monitor results aggregations.
+ * @nr_regions: The number of monitoring regions.
+ *
+ * For each @sample_interval, DAMON checks whether each region is accessed or
+ * not. It aggregates and keeps the access information (number of accesses to
+ * each region) for @aggr_interval time. All time intervals are in
+ * micro-seconds.
+ *
+ * @kdamond: Kernel thread who does the monitoring.
+ * @kdamond_stop: Notifies whether kdamond should stop.
+ * @kdamond_lock: Mutex for the synchronizations with @kdamond.
+ *
+ * For each monitoring context, one kernel thread for the monitoring is
+ * created. The pointer to the thread is stored in @kdamond.
+ *
+ * Once started, the monitoring thread runs until explicitly required to be
+ * terminated or every monitoring target is invalid. The validity of the
+ * targets is checked via the @target_valid callback. The termination can also
+ * be explicitly requested by writing non-zero to @kdamond_stop. The thread
+ * sets @kdamond to NULL when it terminates. Therefore, users can know whether
+ * the monitoring is ongoing or terminated by reading @kdamond. Reads and
+ * writes to @kdamond and @kdamond_stop from outside of the monitoring thread
+ * must be protected by @kdamond_lock.
+ *
+ * Note that the monitoring thread protects only @kdamond and @kdamond_stop via
+ * @kdamond_lock. Accesses to other fields must be protected by themselves.
+ *
* @targets_list: Head of monitoring targets (&damon_target) list.
+ *
+ * @primitive: Set of monitoring primitives for given use cases.
+ * @callback: Set of callbacks for monitoring events notifications.
*/
struct damon_ctx {
+ unsigned long sample_interval;
+ unsigned long aggr_interval;
+ unsigned long nr_regions;
+
+ struct timespec64 last_aggregation;
+
+ struct task_struct *kdamond;
+ bool kdamond_stop;
+ struct mutex kdamond_lock;
+
struct list_head targets_list; /* 'damon_target' objects */
+
+ struct damon_primitive primitive;
+ struct damon_callback callback;
};
#define damon_next_region(r) \
@@ -90,6 +212,15 @@ void damon_free_target(struct damon_target *t);
void damon_destroy_target(struct damon_target *t);
unsigned int damon_nr_regions(struct damon_target *t);
+int damon_set_targets(struct damon_ctx *ctx,
+ unsigned long *ids, ssize_t nr_ids);
+int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
+ unsigned long aggr_int, unsigned long nr_reg);
+
+int damon_nr_running_ctxs(void);
+int damon_start(struct damon_ctx **ctxs, int nr_ctxs);
+int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
+
#endif /* CONFIG_DAMON */
#endif
@@ -8,12 +8,20 @@
#define pr_fmt(fmt) "damon: " fmt
#include <linux/damon.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
#include <linux/slab.h>
+/* Minimal region size. Every damon_region is aligned by this. */
+#define MIN_REGION PAGE_SIZE
+
/*
* Functions and macros for DAMON data structures
*/
+static DEFINE_MUTEX(damon_lock);
+static int nr_running_ctxs;
+
/*
* Construct a damon_region struct
*
@@ -119,3 +127,328 @@ unsigned int damon_nr_regions(struct damon_target *t)
return nr_regions;
}
+
+/**
+ * damon_set_targets() - Set monitoring targets.
+ * @ctx: monitoring context
+ * @ids: array of target ids
+ * @nr_ids: number of entries in @ids
+ *
+ * This function should not be called while the kdamond is running.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_set_targets(struct damon_ctx *ctx,
+ unsigned long *ids, ssize_t nr_ids)
+{
+ ssize_t i;
+ struct damon_target *t, *next;
+
+ damon_for_each_target_safe(t, next, ctx)
+ damon_destroy_target(t);
+
+ for (i = 0; i < nr_ids; i++) {
+ t = damon_new_target(ids[i]);
+ if (!t) {
+ pr_err("Failed to alloc damon_target\n");
+ return -ENOMEM;
+ }
+ damon_add_target(ctx, t);
+ }
+
+ return 0;
+}
+
+/**
+ * damon_set_attrs() - Set attributes for the monitoring.
+ * @ctx: monitoring context
+ * @sample_int: time interval between samplings
+ * @aggr_int: time interval between aggregations
+ * @nr_reg: number of regions
+ *
+ * This function should not be called while the kdamond is running.
+ * Every time interval is in micro-seconds.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
+ unsigned long aggr_int, unsigned long nr_reg)
+{
+ if (nr_reg < 3) {
+ pr_err("nr_regions (%lu) must be at least 3\n",
+ nr_reg);
+ return -EINVAL;
+ }
+
+ ctx->sample_interval = sample_int;
+ ctx->aggr_interval = aggr_int;
+ ctx->nr_regions = nr_reg;
+
+ return 0;
+}
+
+static bool damon_kdamond_running(struct damon_ctx *ctx)
+{
+ bool running;
+
+ mutex_lock(&ctx->kdamond_lock);
+ running = ctx->kdamond != NULL;
+ mutex_unlock(&ctx->kdamond_lock);
+
+ return running;
+}
+
+static int kdamond_fn(void *data);
+
+/*
+ * __damon_start() - Starts monitoring with given context.
+ * @ctx: monitoring context
+ *
+ * This function should be called while damon_lock is hold.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int __damon_start(struct damon_ctx *ctx)
+{
+ int err = -EBUSY;
+
+ mutex_lock(&ctx->kdamond_lock);
+ if (!ctx->kdamond) {
+ err = 0;
+ ctx->kdamond_stop = false;
+ ctx->kdamond = kthread_create(kdamond_fn, ctx, "kdamond.%d",
+ nr_running_ctxs);
+ if (IS_ERR(ctx->kdamond))
+ err = PTR_ERR(ctx->kdamond);
+ else
+ wake_up_process(ctx->kdamond);
+ }
+ mutex_unlock(&ctx->kdamond_lock);
+
+ return err;
+}
+
+/**
+ * damon_start() - Starts the monitorings for a given group of contexts.
+ * @ctxs: an array of the pointers for contexts to start monitoring
+ * @nr_ctxs: size of @ctxs
+ *
+ * This function starts a group of monitoring threads for a group of monitoring
+ * contexts. One thread per each context is created and run in parallel. The
+ * caller should handle synchronization between the threads by itself. If a
+ * group of threads that created by other 'damon_start()' call is currently
+ * running, this function does nothing but returns -EBUSY.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
+{
+ int i;
+ int err = 0;
+
+ mutex_lock(&damon_lock);
+ if (nr_running_ctxs) {
+ mutex_unlock(&damon_lock);
+ return -EBUSY;
+ }
+
+ for (i = 0; i < nr_ctxs; i++) {
+ err = __damon_start(ctxs[i]);
+ if (err)
+ break;
+ nr_running_ctxs++;
+ }
+ mutex_unlock(&damon_lock);
+
+ return err;
+}
+
+/*
+ * __damon_stop() - Stops monitoring of given context.
+ * @ctx: monitoring context
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int __damon_stop(struct damon_ctx *ctx)
+{
+ mutex_lock(&ctx->kdamond_lock);
+ if (ctx->kdamond) {
+ ctx->kdamond_stop = true;
+ mutex_unlock(&ctx->kdamond_lock);
+ while (damon_kdamond_running(ctx))
+ usleep_range(ctx->sample_interval,
+ ctx->sample_interval * 2);
+ return 0;
+ }
+ mutex_unlock(&ctx->kdamond_lock);
+
+ return -EPERM;
+}
+
+/**
+ * damon_stop() - Stops the monitorings for a given group of contexts.
+ * @ctxs: an array of the pointers for contexts to stop monitoring
+ * @nr_ctxs: size of @ctxs
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
+{
+ int i, err = 0;
+
+ for (i = 0; i < nr_ctxs; i++) {
+ /* nr_running_ctxs is decremented in kdamond_fn */
+ err = __damon_stop(ctxs[i]);
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+
+/*
+ * Functions for DAMON core logics
+ */
+
+/*
+ * damon_check_reset_time_interval() - Check if a time interval is elapsed.
+ * @baseline: the time to check whether the interval has elapsed since
+ * @interval: the time interval (microseconds)
+ *
+ * See whether the given time interval has passed since the given baseline
+ * time. If so, it also updates the baseline to current time for next check.
+ *
+ * Return: true if the time interval has passed, or false otherwise.
+ */
+static bool damon_check_reset_time_interval(struct timespec64 *baseline,
+ unsigned long interval)
+{
+ struct timespec64 now;
+
+ ktime_get_coarse_ts64(&now);
+ if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) <
+ interval * 1000)
+ return false;
+ *baseline = now;
+ return true;
+}
+
+/*
+ * Check whether it is time to flush the aggregated information
+ */
+static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx)
+{
+ return damon_check_reset_time_interval(&ctx->last_aggregation,
+ ctx->aggr_interval);
+}
+
+/*
+ * Reset the aggregated monitoring results ('nr_accesses' of each region).
+ */
+static void kdamond_reset_aggregated(struct damon_ctx *c)
+{
+ struct damon_target *t;
+
+ damon_for_each_target(t, c) {
+ struct damon_region *r;
+
+ damon_for_each_region(r, t)
+ r->nr_accesses = 0;
+ }
+}
+
+/*
+ * Check whether current monitoring should be stopped
+ *
+ * The monitoring is stopped when either the user requested to stop, or all
+ * monitoring targets are invalid.
+ *
+ * Returns true if need to stop current monitoring.
+ */
+static bool kdamond_need_stop(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ bool stop;
+
+ mutex_lock(&ctx->kdamond_lock);
+ stop = ctx->kdamond_stop;
+ mutex_unlock(&ctx->kdamond_lock);
+ if (stop)
+ return true;
+
+ if (!ctx->primitive.target_valid)
+ return false;
+
+ damon_for_each_target(t, ctx) {
+ if (ctx->primitive.target_valid(t))
+ return false;
+ }
+
+ return true;
+}
+
+static void set_kdamond_stop(struct damon_ctx *ctx, bool stop)
+{
+ mutex_lock(&ctx->kdamond_lock);
+ ctx->kdamond_stop = stop;
+ mutex_unlock(&ctx->kdamond_lock);
+}
+
+#define kdamond_call_prmt(ctx, fn) \
+ do { \
+ if (ctx->primitive.fn) \
+ ctx->primitive.fn(ctx); \
+ } while (0)
+
+#define kdamond_callback(ctx, fn) \
+ do { \
+ if (ctx->callback.fn && ctx->callback.fn(ctx)) \
+ set_kdamond_stop(ctx, true); \
+ } while (0)
+
+/*
+ * The monitoring daemon that runs as a kernel thread
+ */
+static int kdamond_fn(void *data)
+{
+ struct damon_ctx *ctx = (struct damon_ctx *)data;
+ struct damon_target *t;
+ struct damon_region *r, *next;
+
+ pr_info("kdamond (%d) starts\n", ctx->kdamond->pid);
+
+ kdamond_call_prmt(ctx, init_target_regions);
+ kdamond_callback(ctx, before_start);
+
+ while (!kdamond_need_stop(ctx)) {
+ kdamond_call_prmt(ctx, prepare_access_checks);
+ kdamond_callback(ctx, after_sampling);
+
+ usleep_range(ctx->sample_interval, ctx->sample_interval + 1);
+
+ kdamond_call_prmt(ctx, check_accesses);
+
+ if (kdamond_aggregate_interval_passed(ctx)) {
+ kdamond_callback(ctx, after_aggregation);
+ kdamond_reset_aggregated(ctx);
+ }
+ }
+ damon_for_each_target(t, ctx) {
+ damon_for_each_region_safe(r, next, t)
+ damon_destroy_region(r);
+ }
+
+ kdamond_callback(ctx, before_terminate);
+ kdamond_call_prmt(ctx, cleanup);
+
+ pr_debug("kdamond (%d) finishes\n", ctx->kdamond->pid);
+ mutex_lock(&ctx->kdamond_lock);
+ ctx->kdamond = NULL;
+ mutex_unlock(&ctx->kdamond_lock);
+
+ mutex_lock(&damon_lock);
+ nr_running_ctxs--;
+ mutex_unlock(&damon_lock);
+
+ do_exit(0);
+}