@@ -429,7 +429,13 @@ static void io_prep_async_work(struct io_kiocb *req)
}
if (req->flags & REQ_F_ISREG) {
- if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
+ bool should_hash = def->hash_reg_file;
+
+ /* don't serialize this request if the fs doesn't need it */
+ if (should_hash && (req->file->f_flags & O_DIRECT) &&
+ (req->file->f_mode & FMODE_DIO_PARALLEL_WRITE))
+ should_hash = false;
+ if (should_hash || (ctx->flags & IORING_SETUP_IOPOLL))
io_wq_hash_work(&req->work, file_inode(req->file));
} else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) {
if (def->unbound_nonreg_file)
io_uring hashes writes to a given file/inode so that it can serialize them. This is useful if the file system needs exclusive access to the file to perform the write, as otherwise we end up with a ton of io-wq threads trying to lock the inode at the same time. This can cause excessive system time. But if the file system has flagged that it supports parallel O_DIRECT writes, then there's no need to serialize the writes. Check for that through FMODE_DIO_PARALLEL_WRITE and don't hash it if we don't need to. In a basic test of 8 threads writing to a file on XFS on a gen2 Optane, with each thread writing in 4k chunks, it improves performance from ~1350K IOPS (or ~5290MiB/sec) to ~1410K IOPS (or ~5500MiB/sec). Signed-off-by: Jens Axboe <axboe@kernel.dk> --- io_uring/io_uring.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)