diff mbox

[RFC] blk-throttle: add burst allowance

Message ID 20171026235404.62336-1-khazhy@google.com (mailing list archive)
State New, archived
Headers show

Commit Message

Khazhy Kumykov Oct. 26, 2017, 11:54 p.m. UTC
Allows configuration additional bytes or ios before a throttle is
triggered. Slice end is extended to cover expended allowance recovery
time.

Usage would be e.g. per device to allow users to take up to X bytes/ios
at full speed, but be limited to Y bps/iops with sustained usage.

Signed-off-by: Khazhismel Kumykov <khazhy@google.com>
---
 block/Kconfig        |  11 +++
 block/blk-throttle.c | 185 ++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 186 insertions(+), 10 deletions(-)
diff mbox

Patch

diff --git a/block/Kconfig b/block/Kconfig
index 3ab42bbb06d5..16545caa7fc9 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -127,6 +127,17 @@  config BLK_DEV_THROTTLING_LOW
 
 	Note, this is an experimental interface and could be changed someday.
 
+config BLK_DEV_THROTTLING_BURST
+    bool "Block throttling .burst allowance interface"
+    depends on BLK_DEV_THROTTLING
+    default n
+    ---help---
+    Add .burst allowance for block throttling. Burst allowance allows for
+    additional unthrottled usage, while still limiting speed for sustained
+    usage.
+
+    If in doubt, say N.
+
 config BLK_CMDLINE_PARSER
 	bool "Block device command line partition parser"
 	default n
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index d80c3f0144c5..e09ec11e9c5f 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -156,6 +156,11 @@  struct throtl_grp {
 	/* Number of bio's dispatched in current slice */
 	unsigned int io_disp[2];
 
+#ifdef CONFIG_BLK_DEV_THROTTLING_BURST
+	uint64_t bytes_burst_conf[2];
+	unsigned int io_burst_conf[2];
+#endif
+
 	unsigned long last_low_overflow_time[2];
 
 	uint64_t last_bytes_disp[2];
@@ -506,6 +511,12 @@  static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
 	tg->bps_conf[WRITE][LIMIT_MAX] = U64_MAX;
 	tg->iops_conf[READ][LIMIT_MAX] = UINT_MAX;
 	tg->iops_conf[WRITE][LIMIT_MAX] = UINT_MAX;
+#ifdef CONFIG_BLK_DEV_THROTTLING_BURST
+	tg->bytes_burst_conf[READ] = 0;
+	tg->bytes_burst_conf[WRITE] = 0;
+	tg->io_burst_conf[READ] = 0;
+	tg->io_burst_conf[WRITE] = 0;
+#endif
 	/* LIMIT_LOW will have default value 0 */
 
 	tg->latency_target = DFL_LATENCY_TARGET;
@@ -799,6 +810,26 @@  static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
 		   tg->slice_end[rw], jiffies);
 }
 
+/*
+ * When current slice should end.
+ *
+ * With CONFIG_BLK_DEV_THROTTLING_BURST, we will wait longer than min_wait
+ * for slice to recover used burst allowance. (*_disp -> 0). Setting slice_end
+ * before this would result in tg receiving additional burst allowance.
+ */
+static inline unsigned long throtl_slice_wait(struct throtl_grp *tg, bool rw,
+						unsigned long min_wait)
+{
+	unsigned long bytes_wait = 0, io_wait = 0;
+#ifdef CONFIG_BLK_DEV_THROTTLING_BURST
+	if (tg->bytes_burst_conf[rw])
+		bytes_wait = (tg->bytes_disp[rw] * HZ) / tg_bps_limit(tg, rw);
+	if (tg->io_burst_conf[rw])
+		io_wait = (tg->io_disp[rw] * HZ) / tg_iops_limit(tg, rw);
+#endif
+	return jiffies + max(min_wait, max(bytes_wait, io_wait));
+}
+
 static inline void throtl_set_slice_end(struct throtl_grp *tg, bool rw,
 					unsigned long jiffy_end)
 {
@@ -848,7 +879,8 @@  static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
 	 * is bad because it does not allow new slice to start.
 	 */
 
-	throtl_set_slice_end(tg, rw, jiffies + tg->td->throtl_slice);
+	throtl_set_slice_end(tg, rw,
+			     throtl_slice_wait(tg, rw, tg->td->throtl_slice));
 
 	time_elapsed = jiffies - tg->slice_start[rw];
 
@@ -888,7 +920,7 @@  static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
 				  unsigned long *wait)
 {
 	bool rw = bio_data_dir(bio);
-	unsigned int io_allowed;
+	unsigned int io_allowed, io_disp;
 	unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
 	u64 tmp;
 
@@ -907,6 +939,17 @@  static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
 	 * have been trimmed.
 	 */
 
+	io_disp = tg->io_disp[rw];
+
+#ifdef CONFIG_BLK_DEV_THROTTLING_BURST
+	if (tg->io_disp[rw] < tg->io_burst_conf[rw]) {
+		if (wait)
+			*wait = 0;
+		return true;
+	}
+	io_disp -= tg->io_burst_conf[rw];
+#endif
+
 	tmp = (u64)tg_iops_limit(tg, rw) * jiffy_elapsed_rnd;
 	do_div(tmp, HZ);
 
@@ -915,14 +958,14 @@  static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
 	else
 		io_allowed = tmp;
 
-	if (tg->io_disp[rw] + 1 <= io_allowed) {
+	if (io_disp + 1 <= io_allowed) {
 		if (wait)
 			*wait = 0;
 		return true;
 	}
 
 	/* Calc approx time to dispatch */
-	jiffy_wait = ((tg->io_disp[rw] + 1) * HZ) / tg_iops_limit(tg, rw) + 1;
+	jiffy_wait = ((io_disp + 1) * HZ) / tg_iops_limit(tg, rw) + 1;
 
 	if (jiffy_wait > jiffy_elapsed)
 		jiffy_wait = jiffy_wait - jiffy_elapsed;
@@ -938,7 +981,7 @@  static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
 				 unsigned long *wait)
 {
 	bool rw = bio_data_dir(bio);
-	u64 bytes_allowed, extra_bytes, tmp;
+	u64 bytes_allowed, extra_bytes, bytes_disp, tmp;
 	unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
 	unsigned int bio_size = throtl_bio_data_size(bio);
 
@@ -950,18 +993,28 @@  static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
 
 	jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
 
+	bytes_disp = tg->bytes_disp[rw];
+#ifdef CONFIG_BLK_DEV_THROTTLING_BURST
+	if (tg->bytes_disp[rw] < tg->bytes_burst_conf[rw]) {
+		if (wait)
+			*wait = 0;
+		return true;
+	}
+	bytes_disp -= tg->bytes_burst_conf[rw];
+#endif
+
 	tmp = tg_bps_limit(tg, rw) * jiffy_elapsed_rnd;
 	do_div(tmp, HZ);
 	bytes_allowed = tmp;
 
-	if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
+	if (bytes_disp + bio_size <= bytes_allowed) {
 		if (wait)
 			*wait = 0;
 		return true;
 	}
 
 	/* Calc approx time to dispatch */
-	extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed;
+	extra_bytes = bytes_disp + bio_size - bytes_allowed;
 	jiffy_wait = div64_u64(extra_bytes * HZ, tg_bps_limit(tg, rw));
 
 	if (!jiffy_wait)
@@ -1017,7 +1070,7 @@  static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
 		if (time_before(tg->slice_end[rw],
 		    jiffies + tg->td->throtl_slice))
 			throtl_extend_slice(tg, rw,
-				jiffies + tg->td->throtl_slice);
+			    throtl_slice_wait(tg, rw, tg->td->throtl_slice));
 	}
 
 	if (tg_with_in_bps_limit(tg, bio, &bps_wait) &&
@@ -1032,8 +1085,10 @@  static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
 	if (wait)
 		*wait = max_wait;
 
-	if (time_before(tg->slice_end[rw], jiffies + max_wait))
-		throtl_extend_slice(tg, rw, jiffies + max_wait);
+	if (time_before(tg->slice_end[rw],
+			throtl_slice_wait(tg, rw, max_wait)))
+		throtl_extend_slice(tg, rw,
+				    throtl_slice_wait(tg, rw, max_wait));
 
 	return 0;
 }
@@ -1704,6 +1759,108 @@  static ssize_t tg_set_limit(struct kernfs_open_file *of,
 	return ret ?: nbytes;
 }
 
+#ifdef CONFIG_BLK_DEV_THROTTLING_BURST
+static u64 tg_prfill_burst(struct seq_file *sf, struct blkg_policy_data *pd,
+			   int data)
+{
+	struct throtl_grp *tg = pd_to_tg(pd);
+	const char *dname = blkg_dev_name(pd->blkg);
+	char bufs[4][21];
+
+	if (!dname)
+		return 0;
+
+	if (tg->bytes_burst_conf[READ] == 0 &&
+	    tg->bytes_burst_conf[WRITE] == 0 &&
+	    tg->io_burst_conf[READ] == 0 &&
+	    tg->io_burst_conf[WRITE] == 0)
+		return 0;
+
+	snprintf(bufs[0], sizeof(bufs[0]), "%llu",
+		tg->bytes_burst_conf[READ]);
+	snprintf(bufs[1], sizeof(bufs[1]), "%llu",
+		tg->bytes_burst_conf[WRITE]);
+	snprintf(bufs[2], sizeof(bufs[2]), "%u",
+		tg->io_burst_conf[READ]);
+	snprintf(bufs[3], sizeof(bufs[3]), "%u",
+		tg->io_burst_conf[WRITE]);
+
+	seq_printf(sf, "%s brbyte=%s bwbyte=%s brio=%s bwio=%s\n",
+		   dname, bufs[0], bufs[1], bufs[2], bufs[3]);
+	return 0;
+}
+
+static int tg_print_burst(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_burst,
+			  &blkcg_policy_throtl, 0, false);
+	return 0;
+}
+
+static ssize_t tg_set_burst(struct kernfs_open_file *of,
+			  char *buf, size_t nbytes, loff_t off)
+{
+	struct blkcg *blkcg = css_to_blkcg(of_css(of));
+	struct blkg_conf_ctx ctx;
+	struct throtl_grp *tg;
+	u64 v[4];
+	int ret;
+
+	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
+	if (ret)
+		return ret;
+
+	tg = blkg_to_tg(ctx.blkg);
+
+	v[0] = tg->bytes_burst_conf[READ];
+	v[1] = tg->bytes_burst_conf[WRITE];
+	v[2] = tg->io_burst_conf[READ];
+	v[3] = tg->io_burst_conf[WRITE];
+
+	while (true) {
+		char tok[28];	/* bwbyte=18446744073709551616 */
+		char *p;
+		u64 val = U64_MAX;
+		int len;
+
+		if (sscanf(ctx.body, "%27s%n", tok, &len) != 1)
+			break;
+		if (tok[0] == '\0')
+			break;
+		ctx.body += len;
+
+		ret = -EINVAL;
+		p = tok;
+		strsep(&p, "=");
+		if (!p || (kstrtoull(p, 0, &val) != 0 && strcmp(p, "max")))
+			goto out_finish;
+
+		ret = -EINVAL;
+		if (!strcmp(tok, "brbyte"))
+			v[0] = val;
+		else if (!strcmp(tok, "bwbyte"))
+			v[1] = val;
+		else if (!strcmp(tok, "brio"))
+			v[2] = min_t(u64, val, UINT_MAX);
+		else if (!strcmp(tok, "bwio"))
+			v[3] = min_t(u64, val, UINT_MAX);
+		else
+			goto out_finish;
+	}
+
+	tg->bytes_burst_conf[READ] = v[0];
+	tg->bytes_burst_conf[WRITE] = v[1];
+	tg->io_burst_conf[READ] = v[2];
+	tg->io_burst_conf[WRITE] = v[3];
+
+	tg_conf_updated(tg, false);
+	ret = 0;
+out_finish:
+	blkg_conf_finish(&ctx);
+	return ret ?: nbytes;
+}
+#endif
+
 static struct cftype throtl_files[] = {
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 	{
@@ -1713,6 +1870,14 @@  static struct cftype throtl_files[] = {
 		.write = tg_set_limit,
 		.private = LIMIT_LOW,
 	},
+#endif
+#ifdef CONFIG_BLK_DEV_THROTTLING_BURST
+	{
+		.name = "burst",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = tg_print_burst,
+		.write = tg_set_burst,
+	},
 #endif
 	{
 		.name = "max",