@@ -424,6 +424,13 @@ Description:
[RW] This file is used to control (on/off) the iostats
accounting of the disk.
+What: /sys/block/<disk>/queue/iostats_passthrough
+Date: October 2024
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] This file is used to control (on/off) the iostats
+ accounting of the disk for passthrough commands.
+
What: /sys/block/<disk>/queue/logical_block_size
Date: May 2009
@@ -988,13 +988,43 @@ static inline void blk_account_io_done(struct request *req, u64 now)
}
}
+static inline bool blk_rq_passthrough_stats(struct request *req)
+{
+ struct bio *bio = req->bio;
+
+ if (!blk_queue_passthrough_stat(req->q))
+ return false;
+
+ /* Requests without a bio do not transfer data. */
+ if (!bio)
+ return false;
+
+ /*
+ * Stats are accumulated in the bdev, so must have one attached to a
+ * bio to track stats. Most drivers do not set the bdev for passthrough
+ * requests, but nvme is one that will set it.
+ */
+ if (!bio->bi_bdev)
+ return false;
+
+ /*
+ * We don't know what a passthrough command does, but we know the
+ * payload size and data direction. Ensuring the size is aligned to the
+ * block size filters out most commands with payloads that don't
+ * represent sector access.
+ */
+ if (blk_rq_bytes(req) & (bdev_logical_block_size(bio->bi_bdev) - 1))
+ return false;
+ return true;
+}
+
static inline void blk_account_io_start(struct request *req)
{
trace_block_io_start(req);
if (!blk_queue_io_stat(req->q))
return;
- if (blk_rq_is_passthrough(req))
+ if (blk_rq_is_passthrough(req) && !blk_rq_passthrough_stats(req))
return;
req->rq_flags |= RQF_IO_STAT;
@@ -272,6 +272,34 @@ static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page)
return queue_var_show(disk_nr_zones(disk), page);
}
+static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page)
+{
+ return queue_var_show(blk_queue_passthrough_stat(disk->queue), page);
+}
+
+static ssize_t queue_iostats_passthrough_store(struct gendisk *disk,
+ const char *page, size_t count)
+{
+ struct queue_limits lim;
+ unsigned long ios;
+ ssize_t ret;
+
+ ret = queue_var_store(&ios, page, count);
+ if (ret < 0)
+ return ret;
+
+ lim = queue_limits_start_update(disk->queue);
+ if (ios)
+ lim.flags |= BLK_FLAG_IOSTATS_PASSTHROUGH;
+ else
+ lim.flags &= ~BLK_FLAG_IOSTATS_PASSTHROUGH;
+
+ ret = queue_limits_commit_update(disk->queue, &lim);
+ if (ret)
+ return ret;
+
+ return count;
+}
static ssize_t queue_nomerges_show(struct gendisk *disk, char *page)
{
return queue_var_show((blk_queue_nomerges(disk->queue) << 1) |
@@ -460,6 +488,7 @@ QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
+QUEUE_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough");
QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
QUEUE_RW_ENTRY(queue_poll, "io_poll");
QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay");
@@ -586,6 +615,7 @@ static struct attribute *queue_attrs[] = {
&queue_max_open_zones_entry.attr,
&queue_max_active_zones_entry.attr,
&queue_nomerges_entry.attr,
+ &queue_iostats_passthrough_entry.attr,
&queue_iostats_entry.attr,
&queue_stable_writes_entry.attr,
&queue_add_random_entry.attr,
@@ -349,6 +349,9 @@ typedef unsigned int __bitwise blk_flags_t;
/* I/O topology is misaligned */
#define BLK_FLAG_MISALIGNED ((__force blk_flags_t)(1u << 1))
+/* passthrough command IO accounting */
+#define BLK_FLAG_IOSTATS_PASSTHROUGH ((__force blk_flags_t)(1u << 2))
+
struct queue_limits {
blk_features_t features;
blk_flags_t flags;
@@ -617,6 +620,8 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
#define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL))
#define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT)
+#define blk_queue_passthrough_stat(q) \
+ ((q)->limits.flags & BLK_FLAG_IOSTATS_PASSTHROUGH)
#define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX)
#define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA)
#ifdef CONFIG_BLK_RQ_ALLOC_TIME