@@ -96,3 +96,26 @@ Description:
Indicates the absolute minimum limit of bytes allowed to be
queued on this network device transmit queue. Default value is
0.
+
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/stall_thrs
+Date: Jan 2024
+KernelVersion: 6.9
+Contact: netdev@vger.kernel.org
+Description:
+ Tx completion stall detection threshold in ms. Kernel will
+ guarantee to detect all stalls longer than this threshold but
+ may also detect stalls longer than half of the threshold.
+
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/stall_cnt
+Date: Jan 2024
+KernelVersion: 6.9
+Contact: netdev@vger.kernel.org
+Description:
+ Number of detected Tx completion stalls.
+
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/stall_max
+Date: Jan 2024
+KernelVersion: 6.9
+Contact: netdev@vger.kernel.org
+Description:
+ Longest detected Tx completion stall. Write 0 to clear.
@@ -38,14 +38,22 @@
#ifdef __KERNEL__
+#include <linux/bitops.h>
#include <asm/bug.h>
+#define DQL_HIST_LEN 4
+#define DQL_HIST_ENT(dql, idx) ((dql)->history[(idx) % DQL_HIST_LEN])
+
struct dql {
/* Fields accessed in enqueue path (dql_queued) */
unsigned int num_queued; /* Total ever queued */
unsigned int adj_limit; /* limit + num_completed */
unsigned int last_obj_cnt; /* Count at last queuing */
+ unsigned long history_head; /* top 58 bits of jiffies */
+ /* stall entries, a bit per entry */
+ unsigned long history[DQL_HIST_LEN];
+
/* Fields accessed only by completion path (dql_completed) */
unsigned int limit ____cacheline_aligned_in_smp; /* Current limit */
@@ -62,6 +70,13 @@ struct dql {
unsigned int max_limit; /* Max limit */
unsigned int min_limit; /* Minimum limit */
unsigned int slack_hold_time; /* Time to measure slack */
+
+ /* Stall threshold (in jiffies), defined by user */
+ unsigned short stall_thrs;
+ /* Longest stall detected, reported to user */
+ unsigned short stall_max;
+ unsigned long last_reap; /* Last reap (in jiffies) */
+ unsigned long stall_cnt; /* Number of stalls */
};
/* Set some static maximums */
@@ -74,6 +89,8 @@ struct dql {
*/
static inline void dql_queued(struct dql *dql, unsigned int count)
{
+ unsigned long map, now, now_hi, i;
+
BUG_ON(count > DQL_MAX_OBJECT);
dql->last_obj_cnt = count;
@@ -86,6 +103,34 @@ static inline void dql_queued(struct dql *dql, unsigned int count)
barrier();
dql->num_queued += count;
+
+ now = jiffies;
+ now_hi = now / BITS_PER_LONG;
+
+ /* The following code set a bit in the ring buffer, where each
+ * bit trackes time the packet was queued. The dql->history buffer
+ * tracks DQL_HIST_LEN * BITS_PER_LONG time (jiffies) slot
+ */
+ if (unlikely(now_hi != dql->history_head)) {
+ /* About to reuse slots, clear them */
+ for (i = 0; i < DQL_HIST_LEN; i++) {
+ /* Multiplication masks high bits */
+ if (now_hi * BITS_PER_LONG ==
+ (dql->history_head + i) * BITS_PER_LONG)
+ break;
+ DQL_HIST_ENT(dql, dql->history_head + i + 1) = 0;
+ }
+ /* pairs with smp_rmb() in dql_check_stall() */
+ smp_wmb();
+ WRITE_ONCE(dql->history_head, now_hi);
+ }
+
+ /* __set_bit() does not guarantee WRITE_ONCE() semantics */
+ map = DQL_HIST_ENT(dql, now_hi);
+
+ /* Populate the history with an entry (bit) per queued */
+ if (!(map & BIT_MASK(now)))
+ WRITE_ONCE(DQL_HIST_ENT(dql, now_hi), map | BIT_MASK(now));
}
/* Returns how many objects can be queued, < 0 indicates over limit. */
@@ -36,6 +36,39 @@ TRACE_EVENT(napi_poll,
__entry->work, __entry->budget)
);
+TRACE_EVENT(dql_stall_detected,
+
+ TP_PROTO(unsigned short thrs, unsigned int len,
+ unsigned long last_reap, unsigned long hist_head,
+ unsigned long now, unsigned long *hist),
+
+ TP_ARGS(thrs, len, last_reap, hist_head, now, hist),
+
+ TP_STRUCT__entry(
+ __field( unsigned short, thrs)
+ __field( unsigned int, len)
+ __field( unsigned long, last_reap)
+ __field( unsigned long, hist_head)
+ __field( unsigned long, now)
+ __array( unsigned long, hist, 4)
+ ),
+
+ TP_fast_assign(
+ __entry->thrs = thrs;
+ __entry->len = len;
+ __entry->last_reap = last_reap;
+ __entry->hist_head = hist_head * BITS_PER_LONG;
+ __entry->now = now;
+ memcpy(__entry->hist, hist, sizeof(entry->hist));
+ ),
+
+ TP_printk("thrs %u len %u last_reap %lu hist_head %lu now %lu hist %016lx %016lx %016lx %016lx",
+ __entry->thrs, __entry->len,
+ __entry->last_reap, __entry->hist_head, __entry->now,
+ __entry->hist[0], __entry->hist[1],
+ __entry->hist[2], __entry->hist[3])
+);
+
#undef NO_DEV
#endif /* _TRACE_NAPI_H */
@@ -10,10 +10,77 @@
#include <linux/dynamic_queue_limits.h>
#include <linux/compiler.h>
#include <linux/export.h>
+#include <trace/events/napi.h>
#define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0)
#define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0)
+static void dql_check_stall(struct dql *dql)
+{
+ unsigned short stall_thrs;
+ unsigned long now;
+
+ stall_thrs = READ_ONCE(dql->stall_thrs);
+ if (!stall_thrs)
+ return;
+
+ now = jiffies;
+ /* Check for a potential stall */
+ if (time_after_eq(now, dql->last_reap + stall_thrs)) {
+ unsigned long hist_head, t, start, end;
+
+ /* We are trying to detect a period of at least @stall_thrs
+ * jiffies without any Tx completions, but during first half
+ * of which some Tx was posted.
+ */
+dqs_again:
+ hist_head = READ_ONCE(dql->history_head);
+ /* pairs with smp_wmb() in dql_queued() */
+ smp_rmb();
+
+ /* Get the previous entry in the ring buffer, which is the
+ * oldest sample.
+ */
+ start = (hist_head - DQL_HIST_LEN + 1) * BITS_PER_LONG;
+
+ /* Advance start to continue from the last reap time */
+ if (time_before(start, dql->last_reap + 1))
+ start = dql->last_reap + 1;
+
+ /* Newest sample we should have already seen a completion for */
+ end = hist_head * BITS_PER_LONG + (BITS_PER_LONG - 1);
+
+ /* Shrink the search space to [start, (now - start_thrs/2)] if
+ * `end` is beyond the stall zone
+ */
+ if (time_before(now, end + stall_thrs / 2))
+ end = now - stall_thrs / 2;
+
+ /* Search for the queued time in [t, end] */
+ for (t = start; time_before_eq(t, end); t++)
+ if (test_bit(t % (DQL_HIST_LEN * BITS_PER_LONG),
+ dql->history))
+ break;
+
+ /* Variable t contains the time of the queue */
+ if (!time_before_eq(t, end))
+ goto no_stall;
+
+ /* The ring buffer was modified in the meantime, retry */
+ if (hist_head != READ_ONCE(dql->history_head))
+ goto dqs_again;
+
+ dql->stall_cnt++;
+ dql->stall_max = max_t(unsigned short, dql->stall_max, now - t);
+
+ trace_dql_stall_detected(dql->stall_thrs, now - t,
+ dql->last_reap, dql->history_head,
+ now, dql->history);
+ }
+no_stall:
+ dql->last_reap = now;
+}
+
/* Records completed count and recalculates the queue limit */
void dql_completed(struct dql *dql, unsigned int count)
{
@@ -110,6 +177,8 @@ void dql_completed(struct dql *dql, unsigned int count)
dql->prev_last_obj_cnt = dql->last_obj_cnt;
dql->num_completed = completed;
dql->prev_num_queued = num_queued;
+
+ dql_check_stall(dql);
}
EXPORT_SYMBOL(dql_completed);
@@ -125,6 +194,10 @@ void dql_reset(struct dql *dql)
dql->prev_ovlimit = 0;
dql->lowest_slack = UINT_MAX;
dql->slack_start_time = jiffies;
+
+ dql->last_reap = jiffies;
+ dql->history_head = jiffies / BITS_PER_LONG;
+ memset(dql->history, 0, sizeof(dql->history));
}
EXPORT_SYMBOL(dql_reset);
@@ -133,6 +206,7 @@ void dql_init(struct dql *dql, unsigned int hold_time)
dql->max_limit = DQL_MAX_LIMIT;
dql->min_limit = 0;
dql->slack_hold_time = hold_time;
+ dql->stall_thrs = 0;
dql_reset(dql);
}
EXPORT_SYMBOL(dql_init);
@@ -1414,6 +1414,65 @@ static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
= __ATTR(hold_time, 0644,
bql_show_hold_time, bql_set_hold_time);
+static ssize_t bql_show_stall_thrs(struct netdev_queue *queue, char *buf)
+{
+ struct dql *dql = &queue->dql;
+
+ return sprintf(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs));
+}
+
+static ssize_t bql_set_stall_thrs(struct netdev_queue *queue,
+ const char *buf, size_t len)
+{
+ struct dql *dql = &queue->dql;
+ unsigned int value;
+ int err;
+
+ err = kstrtouint(buf, 10, &value);
+ if (err < 0)
+ return err;
+
+ value = msecs_to_jiffies(value);
+ if (value && (value < 4 || value > 4 / 2 * BITS_PER_LONG))
+ return -ERANGE;
+
+ if (!dql->stall_thrs && value)
+ dql->last_reap = jiffies;
+ /* Force last_reap to be live */
+ smp_wmb();
+ dql->stall_thrs = value;
+
+ return len;
+}
+
+static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init =
+ __ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs);
+
+static ssize_t bql_show_stall_max(struct netdev_queue *queue, char *buf)
+{
+ return sprintf(buf, "%u\n", READ_ONCE(queue->dql.stall_max));
+}
+
+static ssize_t bql_set_stall_max(struct netdev_queue *queue,
+ const char *buf, size_t len)
+{
+ WRITE_ONCE(queue->dql.stall_max, 0);
+ return len;
+}
+
+static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init =
+ __ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max);
+
+static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf)
+{
+ struct dql *dql = &queue->dql;
+
+ return sprintf(buf, "%lu\n", dql->stall_cnt);
+}
+
+static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init =
+ __ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL);
+
static ssize_t bql_show_inflight(struct netdev_queue *queue,
char *buf)
{
@@ -1452,6 +1511,9 @@ static struct attribute *dql_attrs[] __ro_after_init = {
&bql_limit_min_attribute.attr,
&bql_hold_time_attribute.attr,
&bql_inflight_attribute.attr,
+ &bql_stall_thrs_attribute.attr,
+ &bql_stall_cnt_attribute.attr,
+ &bql_stall_max_attribute.attr,
NULL
};