diff mbox series

[RFC,1/3] writeback: add parallel writeback infrastructure

Message ID 20250212103634.448437-2-kundan.kumar@samsung.com (mailing list archive)
State New
Headers show
Series [RFC,1/3] writeback: add parallel writeback infrastructure | expand

Commit Message

Kundan Kumar Feb. 12, 2025, 10:36 a.m. UTC
This patch introduces infrastructure for parallel writeback.

- Writeback context list and index:
  wb_ctx_list: An array that represents the NR_WB_CTX writeback
  contexts.
  wb_idx: An index in wb_ctx_list used to manage the assignment of
  writeback contexts to file-systems.

- Inode lists:
  Each writeback context has its own separate inode lists
  corresponding to b_*.

  b_dirty -> pctx_b_dirty
  b_io -> pctx_b_io
  b_dirty_time -> pctx_b_dirty_time
  b_more_io -> pctx_b_more_io

- Per-writeback context work:
  pctx_dwork to handle multiple worker threads for per-writeback
  context operations concurrently.

- Helper functions:
  A set of helper functions, ctx_b_*_list(), are introduced to
  retrieve the list associated with a specific writeback context.

Signed-off-by: Kundan Kumar <kundan.kumar@samsung.com>
Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
---
 include/linux/backing-dev-defs.h | 61 ++++++++++++++++++++++++++++++++
 mm/backing-dev.c                 | 21 ++++++++++-
 2 files changed, 81 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 2ad261082bba..df627783e879 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -37,6 +37,7 @@  enum wb_stat_item {
 };
 
 #define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
+#define NR_WB_CTX 8
 
 /*
  * why some writeback work was initiated
@@ -80,6 +81,31 @@  struct wb_completion {
 #define DEFINE_WB_COMPLETION(cmpl, bdi)	\
 	struct wb_completion cmpl = WB_COMPLETION_INIT(bdi)
 
+struct wb_ctx {
+	struct delayed_work pctx_dwork;
+	struct list_head pctx_b_dirty;
+	struct list_head pctx_b_io;
+	struct list_head pctx_b_more_io;
+	struct list_head pctx_b_dirty_time;
+	struct bdi_writeback *b_wb;
+	unsigned long last_old_flush;	/* last old data flush */
+	unsigned long state;
+	unsigned long bw_time_stamp;	/* last time write bw is updated */
+	unsigned long dirtied_stamp;
+	unsigned long written_stamp;	/* pages written at bw_time_stamp */
+	unsigned long write_bandwidth;	/* the estimated write bandwidth */
+	unsigned long avg_write_bandwidth; /* further smoothed write bw, > 0 */
+
+	/*
+	 * The base dirty throttle rate, re-calculated on every 200ms.
+	 * All the bdi tasks' dirty rate will be curbed under it.
+	 * @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit
+	 * in small steps and is much more smooth/stable than the latter.
+	 */
+	unsigned long dirty_ratelimit;
+	unsigned long balanced_dirty_ratelimit;
+};
+
 /*
  * Each wb (bdi_writeback) can perform writeback operations, is measured
  * and throttled, independently.  Without cgroup writeback, each bdi
@@ -143,6 +169,8 @@  struct bdi_writeback {
 
 	struct list_head bdi_node;	/* anchored at bdi->wb_list */
 
+	int wb_idx;
+	struct wb_ctx wb_ctx_list[NR_WB_CTX];
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct percpu_ref refcnt;	/* used only for !root wb's */
 	struct fprop_local_percpu memcg_completions;
@@ -208,6 +236,39 @@  struct wb_lock_cookie {
 	unsigned long flags;
 };
 
+static struct wb_ctx *ctx_wb_struct(struct bdi_writeback *wb, int ctx_id)
+{
+	return &wb->wb_ctx_list[ctx_id];
+}
+
+static inline struct list_head *ctx_b_dirty_list(struct bdi_writeback *wb, int ctx_id)
+{
+	struct wb_ctx *p_wb = ctx_wb_struct(wb, ctx_id);
+
+	return &p_wb->pctx_b_dirty;
+}
+
+static inline struct list_head *ctx_b_dirty_time_list(struct bdi_writeback *wb, int ctx_id)
+{
+	struct wb_ctx *p_wb = ctx_wb_struct(wb, ctx_id);
+
+	return &p_wb->pctx_b_dirty_time;
+}
+
+static inline struct list_head *ctx_b_io_list(struct bdi_writeback *wb, int ctx_id)
+{
+	struct wb_ctx *p_wb = ctx_wb_struct(wb, ctx_id);
+
+	return &p_wb->pctx_b_io;
+}
+
+static inline struct list_head *ctx_b_more_io_list(struct bdi_writeback *wb, int ctx_id)
+{
+	struct wb_ctx *p_wb = ctx_wb_struct(wb, ctx_id);
+
+	return &p_wb->pctx_b_more_io;
+}
+
 #ifdef CONFIG_CGROUP_WRITEBACK
 
 /**
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index e61bbb1bd622..fc072e9fe42c 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -515,7 +515,8 @@  static void wb_update_bandwidth_workfn(struct work_struct *work)
 static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
 		   gfp_t gfp)
 {
-	int err;
+	int i, err;
+	struct wb_ctx *p_wb_ctx;
 
 	memset(wb, 0, sizeof(*wb));
 
@@ -533,12 +534,30 @@  static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
 	wb->dirty_ratelimit = INIT_BW;
 	wb->write_bandwidth = INIT_BW;
 	wb->avg_write_bandwidth = INIT_BW;
+	wb->wb_idx = 0;
 
 	spin_lock_init(&wb->work_lock);
 	INIT_LIST_HEAD(&wb->work_list);
 	INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
 	INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn);
 
+	for (i = 0; i < NR_WB_CTX; i++) {
+		p_wb_ctx = &wb->wb_ctx_list[i];
+		p_wb_ctx->b_wb = wb;
+		p_wb_ctx->last_old_flush = jiffies;
+		p_wb_ctx->bw_time_stamp = jiffies;
+		p_wb_ctx->balanced_dirty_ratelimit = INIT_BW;
+		p_wb_ctx->dirty_ratelimit = INIT_BW;
+		p_wb_ctx->write_bandwidth = INIT_BW;
+		p_wb_ctx->avg_write_bandwidth = INIT_BW;
+
+		INIT_LIST_HEAD(ctx_b_dirty_list(wb, i));
+		INIT_LIST_HEAD(ctx_b_dirty_time_list(wb, i));
+		INIT_LIST_HEAD(ctx_b_io_list(wb, i));
+		INIT_LIST_HEAD(ctx_b_more_io_list(wb, i));
+
+		INIT_DELAYED_WORK(&p_wb_ctx->pctx_dwork, wb_workfn);
+	}
 	err = fprop_local_init_percpu(&wb->completions, gfp);
 	if (err)
 		return err;