Message ID | 20231208005250.2910004-8-almasrymina@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Device Memory TCP | expand |
On 12/7/23 5:52 PM, Mina Almasry wrote: > diff --git a/net/core/dev.c b/net/core/dev.c > index b8c8be5a912e..30667e4c3b95 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -2120,6 +2120,41 @@ static int netdev_restart_rx_queue(struct net_device *dev, int rxq_idx) > return err; > } > > +struct page_pool_iov *netdev_alloc_dmabuf(struct netdev_dmabuf_binding *binding) > +{ > + struct dmabuf_genpool_chunk_owner *owner; > + struct page_pool_iov *ppiov; > + unsigned long dma_addr; > + ssize_t offset; > + ssize_t index; > + > + dma_addr = gen_pool_alloc_owner(binding->chunk_pool, PAGE_SIZE, Any reason not to allow allocation sizes other than PAGE_SIZE? e.g., 2048 for smaller MTUs or 8192 for larger ones. It can be a property of page_pool and constant across allocations vs allowing different size for each allocation.
On Fri, Dec 8, 2023 at 9:56 AM David Ahern <dsahern@kernel.org> wrote: > > On 12/7/23 5:52 PM, Mina Almasry wrote: > > diff --git a/net/core/dev.c b/net/core/dev.c > > index b8c8be5a912e..30667e4c3b95 100644 > > --- a/net/core/dev.c > > +++ b/net/core/dev.c > > @@ -2120,6 +2120,41 @@ static int netdev_restart_rx_queue(struct net_device *dev, int rxq_idx) > > return err; > > } > > > > +struct page_pool_iov *netdev_alloc_dmabuf(struct netdev_dmabuf_binding *binding) > > +{ > > + struct dmabuf_genpool_chunk_owner *owner; > > + struct page_pool_iov *ppiov; > > + unsigned long dma_addr; > > + ssize_t offset; > > + ssize_t index; > > + > > + dma_addr = gen_pool_alloc_owner(binding->chunk_pool, PAGE_SIZE, > > Any reason not to allow allocation sizes other than PAGE_SIZE? e.g., > 2048 for smaller MTUs or 8192 for larger ones. It can be a property of > page_pool and constant across allocations vs allowing different size for > each allocation. Only for simplicity. Supporting non-PAGE_SIZE is certainly possible, but in my estimation it's a huge can of worms worthy of itss own series. I find this series complicated to implement and review and support as-is, and if reasonable I would like to punt that as a future improvement. At the minimum, I think the needed changes are: 1. The memory provider needs to report to the page pool the alloc size. 2. The page_pool needs to handle non-PAGE_SIZE memory regions. 3. The drivers need to handle non-PAGE_SIZE memory regions. Drivers today handle fragged pages, but that is different because it's a PAGE_SIZE region that is fragged. This is a non-PAGE_SIZE region in the first place. 4. Any PAGE_SIZE assumptions in the entire net stack need to be removed. At Google we mostly use page aligned MTUs so we're likely not that interested in sub PAGE_SIZE allocations, but we are interested in n * PAGE_SIZE allocations, but, I hope, in a separate followup effort.
diff --git a/include/net/devmem.h b/include/net/devmem.h index 29ff125f9815..29bc337c7743 100644 --- a/include/net/devmem.h +++ b/include/net/devmem.h @@ -48,6 +48,9 @@ struct netdev_dmabuf_binding { }; #ifdef CONFIG_DMA_SHARED_BUFFER +struct page_pool_iov * +netdev_alloc_dmabuf(struct netdev_dmabuf_binding *binding); +void netdev_free_dmabuf(struct page_pool_iov *ppiov); void __netdev_dmabuf_binding_free(struct netdev_dmabuf_binding *binding); int netdev_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, struct netdev_dmabuf_binding **out); @@ -55,6 +58,16 @@ void netdev_unbind_dmabuf(struct netdev_dmabuf_binding *binding); int netdev_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, struct netdev_dmabuf_binding *binding); #else +static inline struct page_pool_iov * +netdev_alloc_dmabuf(struct netdev_dmabuf_binding *binding) +{ + return NULL; +} + +static inline void netdev_free_dmabuf(struct page_pool_iov *ppiov) +{ +} + static inline void __netdev_dmabuf_binding_free(struct netdev_dmabuf_binding *binding) { diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 7dc65774cde5..8bfc2d43efd4 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -79,6 +79,34 @@ static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) } #endif +/* page_pool_iov support */ + +static inline struct dmabuf_genpool_chunk_owner * +page_pool_iov_owner(const struct page_pool_iov *ppiov) +{ + return ppiov->owner; +} + +static inline unsigned int page_pool_iov_idx(const struct page_pool_iov *ppiov) +{ + return ppiov - page_pool_iov_owner(ppiov)->ppiovs; +} + +static inline dma_addr_t +page_pool_iov_dma_addr(const struct page_pool_iov *ppiov) +{ + struct dmabuf_genpool_chunk_owner *owner = page_pool_iov_owner(ppiov); + + return owner->base_dma_addr + + ((dma_addr_t)page_pool_iov_idx(ppiov) << PAGE_SHIFT); +} + +static inline struct netdev_dmabuf_binding * +page_pool_iov_binding(const struct page_pool_iov *ppiov) +{ + return page_pool_iov_owner(ppiov)->binding; +} + /** * page_pool_dev_alloc_pages() - allocate a page. * @pool: pool from which to allocate diff --git a/net/core/dev.c b/net/core/dev.c index b8c8be5a912e..30667e4c3b95 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -155,8 +155,8 @@ #include <net/netdev_rx_queue.h> #include <linux/genalloc.h> #include <linux/dma-buf.h> -#include <net/page_pool/types.h> #include <net/devmem.h> +#include <net/page_pool/helpers.h> #include "dev.h" #include "net-sysfs.h" @@ -2120,6 +2120,41 @@ static int netdev_restart_rx_queue(struct net_device *dev, int rxq_idx) return err; } +struct page_pool_iov *netdev_alloc_dmabuf(struct netdev_dmabuf_binding *binding) +{ + struct dmabuf_genpool_chunk_owner *owner; + struct page_pool_iov *ppiov; + unsigned long dma_addr; + ssize_t offset; + ssize_t index; + + dma_addr = gen_pool_alloc_owner(binding->chunk_pool, PAGE_SIZE, + (void **)&owner); + if (!dma_addr) + return NULL; + + offset = dma_addr - owner->base_dma_addr; + index = offset / PAGE_SIZE; + ppiov = &owner->ppiovs[index]; + + netdev_dmabuf_binding_get(binding); + + return ppiov; +} + +void netdev_free_dmabuf(struct page_pool_iov *ppiov) +{ + struct netdev_dmabuf_binding *binding = page_pool_iov_binding(ppiov); + unsigned long dma_addr = page_pool_iov_dma_addr(ppiov); + + refcount_set(&ppiov->refcount, 1); + + if (gen_pool_has_addr(binding->chunk_pool, dma_addr, PAGE_SIZE)) + gen_pool_free(binding->chunk_pool, dma_addr, PAGE_SIZE); + + netdev_dmabuf_binding_put(binding); +} + /* Protected by rtnl_lock() */ static DEFINE_XARRAY_FLAGS(netdev_dmabuf_bindings, XA_FLAGS_ALLOC1);