[RFC,4/7] blk-mq: add scatterlist-less DMA mapping helpers

Message ID	383557d0fa1aa393dbab4e1daec94b6cced384ab.1730037261.git.leon@kernel.org (mailing list archive)
State	Handled Elsewhere
Headers	show Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1A68B17D358; Sun, 27 Oct 2024 14:23:01 +0000 (UTC) From: Leon Romanovsky <leon@kernel.org> To: Jens Axboe <axboe@kernel.dk>, Jason Gunthorpe <jgg@ziepe.ca>, Robin Murphy <robin.murphy@arm.com>, Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>, Christoph Hellwig <hch@lst.de>, Sagi Grimberg <sagi@grimberg.me> Cc: Keith Busch <kbusch@kernel.org>, Bjorn Helgaas <bhelgaas@google.com>, Logan Gunthorpe <logang@deltatee.com>, Yishai Hadas <yishaih@nvidia.com>, Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>, Kevin Tian <kevin.tian@intel.com>, Alex Williamson <alex.williamson@redhat.com>, Marek Szyprowski <m.szyprowski@samsung.com>, =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= <jglisse@redhat.com>, Andrew Morton <akpm@linux-foundation.org>, Jonathan Corbet <corbet@lwn.net>, linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org, linux-block@vger.kernel.org, linux-rdma@vger.kernel.org, iommu@lists.linux.dev, linux-nvme@lists.infradead.org, linux-pci@vger.kernel.org, kvm@vger.kernel.org, linux-mm@kvack.org Subject: [RFC PATCH 4/7] blk-mq: add scatterlist-less DMA mapping helpers Date: Sun, 27 Oct 2024 16:21:57 +0200 Message-ID: <383557d0fa1aa393dbab4e1daec94b6cced384ab.1730037261.git.leon@kernel.org> In-Reply-To: <cover.1730037261.git.leon@kernel.org> References: <cover.1730037261.git.leon@kernel.org> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	Block and NMMe PCI use of new DMA mapping API \| expand [RFC,0/7] Block and NMMe PCI use of new DMA mapping API [RFC,1/7] block: share more code for bio addition helpers [RFC,2/7] block: don't merge different kinds of P2P transfers in a single bio [RFC,3/7] blk-mq: add a dma mapping iterator [RFC,4/7] blk-mq: add scatterlist-less DMA mapping helpers [RFC,5/7] nvme-pci: remove struct nvme_descriptor [RFC,6/7] nvme-pci: use a better encoding for small prp pool allocations [RFC,7/7] nvme-pci: convert to blk_rq_dma_map

diff --git a/block/blk-merge.c b/block/blk-merge.c index b63fd754a5de..77e5a3d208fc 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -7,6 +7,8 @@ #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/blk-integrity.h> +#include <linux/blk-mq-dma.h> +#include <linux/dma-mapping.h> #include <linux/scatterlist.h> #include <linux/part_stat.h> #include <linux/blk-cgroup.h> @@ -515,6 +517,167 @@ static bool blk_map_iter_next(struct request *req, #define blk_phys_to_page(_paddr) \ (pfn_to_page(__phys_to_pfn(_paddr))) +/* + * The IOVA-based DMA API wants to be able to coalesce at the minimal IOMMU page + * size granularity (which is guaranteed to be <= PAGE_SIZE and usually 4k), so + * we need to ensure our segments are aligned to this as well. + * + * Note that there is no point in using the slightly more complicated IOVA based + * path for single segment mappings. + */ +static inline bool blk_can_dma_map_iova(struct request *req, + struct device *dma_dev) +{ + return !((queue_virt_boundary(req->q) + 1) & + dma_get_merge_boundary(dma_dev)); +} + +static bool blk_dma_map_bus(struct request *req, struct device *dma_dev, + struct blk_dma_iter *iter, struct phys_vec *vec) +{ + iter->addr = pci_p2pdma_bus_addr_map(&iter->p2pdma, vec->paddr); + iter->len = vec->len; + return true; +} + +static bool blk_dma_map_direct(struct request *req, struct device *dma_dev, + struct blk_dma_iter *iter, struct phys_vec *vec) +{ + iter->addr = dma_map_page(dma_dev, blk_phys_to_page(vec->paddr), + offset_in_page(vec->paddr), vec->len, rq_dma_dir(req)); + if (dma_mapping_error(dma_dev, iter->addr)) { + iter->status = BLK_STS_RESOURCE; + return false; + } + iter->len = vec->len; + return true; +} + +static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev, + struct dma_iova_state *state, struct blk_dma_iter *iter, + struct phys_vec *vec) +{ + enum dma_data_direction dir = rq_dma_dir(req); + unsigned int mapped = 0; + int error = 0; + + iter->addr = state->addr; + iter->len = dma_iova_size(state); + + do { + error = dma_iova_link(dma_dev, state, vec->paddr, mapped, + vec->len, dir, 0); + if (error) + break; + mapped += vec->len; + } while (blk_map_iter_next(req, &iter->iter, vec)); + + error = dma_iova_sync(dma_dev, state, 0, mapped, error); + if (error) { + iter->status = errno_to_blk_status(error); + return false; + } + + return true; +} + +/** + * blk_rq_dma_map_iter_start - map the first DMA segment for a request + * @req: request to map + * @dma_dev: device to map to + * @state: DMA IOVA state + * @iter: block layer DMA iterator + * + * Start DMA mapping @req to @dma_dev. @state and @iter are provided by the + * caller and don't need to be initialized. @state needs to be stored for use + * at unmap time, @iter is only needed at map time. + * + * Returns %false if there is no segment to map, including due to an error, or + * %true it it did map a segment. + * + * If a segment was mapped, the DMA address for it is returned in @iter.addr and + * the length in @iter.len. If no segment was mapped the status code is + * returned in @iter.status. + * + * The caller can call blk_rq_dma_map_coalesce() to check if further segments + * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next() + * to try to map the following segments. + */ +bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev, + struct dma_iova_state *state, struct blk_dma_iter *iter) +{ + unsigned int total_len = blk_rq_payload_bytes(req); + struct phys_vec vec; + + iter->iter.bio = req->bio; + iter->iter.iter = req->bio->bi_iter; + memset(&iter->p2pdma, 0, sizeof(iter->p2pdma)); + iter->status = BLK_STS_OK; + + /* + * Grab the first segment ASAP because we'll need it to check for P2P + * transfers. + */ + if (!blk_map_iter_next(req, &iter->iter, &vec)) + return false; + + if (IS_ENABLED(CONFIG_PCI_P2PDMA) && (req->cmd_flags & REQ_P2PDMA)) { + switch (pci_p2pdma_state(&iter->p2pdma, dma_dev, + blk_phys_to_page(vec.paddr))) { + case PCI_P2PDMA_MAP_BUS_ADDR: + return blk_dma_map_bus(req, dma_dev, iter, &vec); + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: + /* + * P2P transfers through the host bridge are treated the + * same as non-P2P transfers below and during unmap. + */ + req->cmd_flags &= ~REQ_P2PDMA; + break; + default: + iter->status = BLK_STS_INVAL; + return false; + } + } + + if (blk_can_dma_map_iova(req, dma_dev) && + dma_iova_try_alloc(dma_dev, state, vec.paddr, total_len)) + return blk_rq_dma_map_iova(req, dma_dev, state, iter, &vec); + return blk_dma_map_direct(req, dma_dev, iter, &vec); +} +EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_start); + +/** + * blk_rq_dma_map_iter_next - map the next DMA segment for a request + * @req: request to map + * @dma_dev: device to map to + * @state: DMA IOVA state + * @iter: block layer DMA iterator + * + * Iterate to the next mapping after a previous call to + * blk_rq_dma_map_iter_start(). See there for a detailed description of the + * arguments. + * + * Returns %false if there is no segment to map, including due to an error, or + * %true it it did map a segment. + * + * If a segment was mapped, the DMA address for it is returned in @iter.addr and + * the length in @iter.len. If no segment was mapped the status code is + * returned in @iter.status. + */ +bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev, + struct dma_iova_state *state, struct blk_dma_iter *iter) +{ + struct phys_vec vec; + + if (!blk_map_iter_next(req, &iter->iter, &vec)) + return false; + + if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) + return blk_dma_map_bus(req, dma_dev, iter, &vec); + return blk_dma_map_direct(req, dma_dev, iter, &vec); +} +EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_next); + static inline struct scatterlist *blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist) { diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h new file mode 100644 index 000000000000..f1dae14e8203 --- /dev/null +++ b/include/linux/blk-mq-dma.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BLK_MQ_DMA_H +#define BLK_MQ_DMA_H + +#include <linux/blk-mq.h> +#include <linux/pci-p2pdma.h> + +struct blk_dma_iter { + /* Output address range for this iteration */ + dma_addr_t addr; + u32 len; + + /* Status code. Only valid when blk_rq_dma_map_iter_* returned false */ + blk_status_t status; + + /* Internal to blk_rq_dma_map_iter_* */ + struct req_iterator iter; + struct pci_p2pdma_map_state p2pdma; +}; + +bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev, + struct dma_iova_state *state, struct blk_dma_iter *iter); +bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev, + struct dma_iova_state *state, struct blk_dma_iter *iter); + +/** + * blk_rq_dma_map_coalesce - were all segments coalesced? + * @state: DMA state to check + * + * Returns true if blk_rq_dma_map_iter_start coalesced all segments into a + * single DMA range. + */ +static inline bool blk_rq_dma_map_coalesce(struct dma_iova_state *state) +{ + return dma_use_iova(state); +} + +/** + * blk_rq_dma_map_coalesce - try to DMA unmap a request + * @req: request to unmap + * @dma_dev: device to unmap from + * @state: DMA IOVA state + * + * Returns %false if the callers needs to manually unmap every DMA segment + * mapped using @iter or %true if no work is left to be done. + */ +static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev, + struct dma_iova_state *state) +{ + if (req->cmd_flags & REQ_P2PDMA) + return true; + + if (dma_use_iova(state)) { + dma_iova_destroy(dma_dev, state, rq_dma_dir(req), 0); + return true; + } + + if (!dma_need_unmap(dma_dev)) + return true; + + return false; +} + +#endif /* BLK_MQ_DMA_H */

[RFC,4/7] blk-mq: add scatterlist-less DMA mapping helpers

Commit Message

Patch