@@ -639,6 +639,8 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
{
tg->bytes_disp[rw] = 0;
tg->io_disp[rw] = 0;
+ tg->bytes_skipped[rw] = 0;
+ tg->io_skipped[rw] = 0;
/*
* Previous slice has expired. We must have trimmed it after last
@@ -656,12 +658,17 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
tg->slice_end[rw], jiffies);
}
-static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
+static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw,
+ bool clear_skipped)
{
tg->bytes_disp[rw] = 0;
tg->io_disp[rw] = 0;
tg->slice_start[rw] = jiffies;
tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
+ if (clear_skipped) {
+ tg->bytes_skipped[rw] = 0;
+ tg->io_skipped[rw] = 0;
+ }
throtl_log(&tg->service_queue,
"[%c] new slice start=%lu end=%lu jiffies=%lu",
@@ -783,6 +790,41 @@ static u64 calculate_bytes_allowed(u64 bps_limit, unsigned long jiffy_elapsed)
return mul_u64_u64_div_u64(bps_limit, (u64)jiffy_elapsed, (u64)HZ);
}
+static void __tg_update_skipped(struct throtl_grp *tg, bool rw)
+{
+ unsigned long jiffy_elapsed = jiffies - tg->slice_start[rw];
+ u64 bps_limit = tg_bps_limit(tg, rw);
+ u32 iops_limit = tg_iops_limit(tg, rw);
+
+ /*
+ * Following calculation won't overflow as long as bios that are
+ * dispatched later won't preempt already throttled bios. Even if such
+ * overflow do happen, there should be no problem because we are using
+ * unsigned here, and bytes_skipped/io_skipped will be updated
+ * correctly.
+ */
+ if (bps_limit != U64_MAX)
+ tg->bytes_skipped[rw] +=
+ calculate_bytes_allowed(bps_limit, jiffy_elapsed) -
+ tg->bytes_disp[rw];
+ if (iops_limit != UINT_MAX)
+ tg->io_skipped[rw] +=
+ calculate_io_allowed(iops_limit, jiffy_elapsed) -
+ tg->io_disp[rw];
+}
+
+static void tg_update_skipped(struct throtl_grp *tg)
+{
+ if (tg->service_queue.nr_queued[READ])
+ __tg_update_skipped(tg, READ);
+ if (tg->service_queue.nr_queued[WRITE])
+ __tg_update_skipped(tg, WRITE);
+
+ throtl_log(&tg->service_queue, "%s: %llu %llu %u %u\n", __func__,
+ tg->bytes_skipped[READ], tg->bytes_skipped[WRITE],
+ tg->io_skipped[READ], tg->io_skipped[WRITE]);
+}
+
static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
u32 iops_limit, unsigned long *wait)
{
@@ -800,7 +842,8 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
/* Round up to the next throttle slice, wait time must be nonzero */
jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
- io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed_rnd);
+ io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed_rnd) +
+ tg->io_skipped[rw];
if (tg->io_disp[rw] + 1 <= io_allowed) {
if (wait)
*wait = 0;
@@ -837,7 +880,8 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
jiffy_elapsed_rnd = tg->td->throtl_slice;
jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
- bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd);
+ bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd) +
+ tg->bytes_skipped[rw];
if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
if (wait)
*wait = 0;
@@ -898,7 +942,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
* slice and it should be extended instead.
*/
if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw]))
- throtl_start_new_slice(tg, rw);
+ throtl_start_new_slice(tg, rw, true);
else {
if (time_before(tg->slice_end[rw],
jiffies + tg->td->throtl_slice))
@@ -1327,8 +1371,8 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global)
* that a group's limit are dropped suddenly and we don't want to
* account recently dispatched IO with new low rate.
*/
- throtl_start_new_slice(tg, READ);
- throtl_start_new_slice(tg, WRITE);
+ throtl_start_new_slice(tg, READ, false);
+ throtl_start_new_slice(tg, WRITE, false);
if (tg->flags & THROTL_TG_PENDING) {
tg_update_disptime(tg);
@@ -1356,6 +1400,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
v = U64_MAX;
tg = blkg_to_tg(ctx.blkg);
+ tg_update_skipped(tg);
if (is_u64)
*(u64 *)((void *)tg + of_cft(of)->private) = v;
@@ -1542,6 +1587,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
return ret;
tg = blkg_to_tg(ctx.blkg);
+ tg_update_skipped(tg);
v[0] = tg->bps_conf[READ][index];
v[1] = tg->bps_conf[WRITE][index];
@@ -115,6 +115,15 @@ struct throtl_grp {
uint64_t bytes_disp[2];
/* Number of bio's dispatched in current slice */
unsigned int io_disp[2];
+ /*
+ * The following two fields are used to calculate new wait time for
+ * throttled bio when new configuration is submmited.
+ *
+ * Number of bytes will be skipped in current slice
+ */
+ uint64_t bytes_skipped[2];
+ /* Number of bio will be skipped in current slice */
+ unsigned int io_skipped[2];
unsigned long last_low_overflow_time[2];