Message ID | 20240103095650.25769-5-linyunsheng@huawei.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | remove page frag implementation in vhost_net | expand |
On Wed, 2024-01-03 at 17:56 +0800, Yunsheng Lin wrote: > The page frag in vhost_net_page_frag_refill() uses the > 'struct page_frag' from skb_page_frag_refill(), but it's > implementation is similar to page_frag_alloc_align() now. > > This patch removes vhost_net_page_frag_refill() by using > 'struct page_frag_cache' instead of 'struct page_frag', > and allocating frag using page_frag_alloc_align(). > > The added benefit is that not only unifying the page frag > implementation a little, but also having about 0.5% performance > boost testing by using the vhost_net_test introduced in the > last patch. > > Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> > Acked-by: Jason Wang <jasowang@redhat.com> > --- > drivers/vhost/net.c | 93 ++++++++++++++------------------------------- > 1 file changed, 29 insertions(+), 64 deletions(-) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index e574e21cc0ca..805e11d598e4 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -141,10 +141,8 @@ struct vhost_net { > unsigned tx_zcopy_err; > /* Flush in progress. Protected by tx vq lock. */ > bool tx_flush; > - /* Private page frag */ > - struct page_frag page_frag; > - /* Refcount bias of page frag */ > - int refcnt_bias; > + /* Private page frag cache */ > + struct page_frag_cache pf_cache; > }; > > static unsigned vhost_net_zcopy_mask __read_mostly; > @@ -655,41 +653,6 @@ static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len) > !vhost_vq_avail_empty(vq->dev, vq); > } > > -static bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz, > - struct page_frag *pfrag, gfp_t gfp) > -{ > - if (pfrag->page) { > - if (pfrag->offset + sz <= pfrag->size) > - return true; > - __page_frag_cache_drain(pfrag->page, net->refcnt_bias); > - } > - > - pfrag->offset = 0; > - net->refcnt_bias = 0; > - if (SKB_FRAG_PAGE_ORDER) { > - /* Avoid direct reclaim but allow kswapd to wake */ > - pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | > - __GFP_COMP | __GFP_NOWARN | > - __GFP_NORETRY | __GFP_NOMEMALLOC, > - SKB_FRAG_PAGE_ORDER); > - if (likely(pfrag->page)) { > - pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; > - goto done; > - } > - } > - pfrag->page = alloc_page(gfp); > - if (likely(pfrag->page)) { > - pfrag->size = PAGE_SIZE; > - goto done; > - } > - return false; > - > -done: > - net->refcnt_bias = USHRT_MAX; > - page_ref_add(pfrag->page, USHRT_MAX - 1); > - return true; > -} > - > #define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) > > static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, > @@ -699,7 +662,6 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, > struct vhost_net *net = container_of(vq->dev, struct vhost_net, > dev); > struct socket *sock = vhost_vq_get_backend(vq); > - struct page_frag *alloc_frag = &net->page_frag; > struct virtio_net_hdr *gso; > struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp]; > struct tun_xdp_hdr *hdr; > @@ -710,6 +672,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, > int sock_hlen = nvq->sock_hlen; > void *buf; > int copied; > + int ret; > > if (unlikely(len < nvq->sock_hlen)) > return -EFAULT; > @@ -719,18 +682,17 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, > return -ENOSPC; > > buflen += SKB_DATA_ALIGN(len + pad); > - alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES); > - if (unlikely(!vhost_net_page_frag_refill(net, buflen, > - alloc_frag, GFP_KERNEL))) > + buf = page_frag_alloc_align(&net->pf_cache, buflen, GFP_KERNEL, > + SMP_CACHE_BYTES); If your changes from patch 1 are just to make it fit into this layout might I suggest just splitting up page_frag_alloc_align into an inline that accepts the arguments you have here, and adding __page_frag_alloc_align which is passed the mask the original function expected. By doing that you should be able to maintain the same level of performance and still get most of the code cleanup. > + if (unlikely(!buf)) > return -ENOMEM; > > - buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; > - copied = copy_page_from_iter(alloc_frag->page, > - alloc_frag->offset + > - offsetof(struct tun_xdp_hdr, gso), > - sock_hlen, from); > - if (copied != sock_hlen) > - return -EFAULT; > + copied = copy_from_iter(buf + offsetof(struct tun_xdp_hdr, gso), > + sock_hlen, from); > + if (copied != sock_hlen) { > + ret = -EFAULT; > + goto err; > + } > > hdr = buf; > gso = &hdr->gso; > @@ -743,27 +705,30 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, > vhost16_to_cpu(vq, gso->csum_start) + > vhost16_to_cpu(vq, gso->csum_offset) + 2); > > - if (vhost16_to_cpu(vq, gso->hdr_len) > len) > - return -EINVAL; > + if (vhost16_to_cpu(vq, gso->hdr_len) > len) { > + ret = -EINVAL; > + goto err; > + } > } > > len -= sock_hlen; > - copied = copy_page_from_iter(alloc_frag->page, > - alloc_frag->offset + pad, > - len, from); > - if (copied != len) > - return -EFAULT; > + copied = copy_from_iter(buf + pad, len, from); > + if (copied != len) { > + ret = -EFAULT; > + goto err; > + } > > xdp_init_buff(xdp, buflen, NULL); > xdp_prepare_buff(xdp, buf, pad, len, true); > hdr->buflen = buflen; > > - --net->refcnt_bias; > - alloc_frag->offset += buflen; > - > ++nvq->batched_xdp; > > return 0; > + > +err: > + page_frag_free(buf); > + return ret; > } > > static void handle_tx_copy(struct vhost_net *net, struct socket *sock) > @@ -1353,8 +1318,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) > vqs[VHOST_NET_VQ_RX]); > > f->private_data = n; > - n->page_frag.page = NULL; > - n->refcnt_bias = 0; > + n->pf_cache.va = NULL; > > return 0; > } > @@ -1422,8 +1386,9 @@ static int vhost_net_release(struct inode *inode, struct file *f) > kfree(n->vqs[VHOST_NET_VQ_RX].rxq.queue); > kfree(n->vqs[VHOST_NET_VQ_TX].xdp); > kfree(n->dev.vqs); > - if (n->page_frag.page) > - __page_frag_cache_drain(n->page_frag.page, n->refcnt_bias); > + if (n->pf_cache.va) > + __page_frag_cache_drain(virt_to_head_page(n->pf_cache.va), > + n->pf_cache.pagecnt_bias); > kvfree(n); > return 0; > } I would recommend reordering this patch with patch 5. Then you could remove the block that is setting "n->pf_cache.va = NULL" above and just make use of page_frag_cache_drain in the lower block which would also return the va to NULL.
On 2024/1/6 0:06, Alexander H Duyck wrote: >> >> static void handle_tx_copy(struct vhost_net *net, struct socket *sock) >> @@ -1353,8 +1318,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) >> vqs[VHOST_NET_VQ_RX]); >> >> f->private_data = n; >> - n->page_frag.page = NULL; >> - n->refcnt_bias = 0; >> + n->pf_cache.va = NULL; >> >> return 0; >> } >> @@ -1422,8 +1386,9 @@ static int vhost_net_release(struct inode *inode, struct file *f) >> kfree(n->vqs[VHOST_NET_VQ_RX].rxq.queue); >> kfree(n->vqs[VHOST_NET_VQ_TX].xdp); >> kfree(n->dev.vqs); >> - if (n->page_frag.page) >> - __page_frag_cache_drain(n->page_frag.page, n->refcnt_bias); >> + if (n->pf_cache.va) >> + __page_frag_cache_drain(virt_to_head_page(n->pf_cache.va), >> + n->pf_cache.pagecnt_bias); >> kvfree(n); >> return 0; >> } > > I would recommend reordering this patch with patch 5. Then you could > remove the block that is setting "n->pf_cache.va = NULL" above and just > make use of page_frag_cache_drain in the lower block which would also > return the va to NULL. I am not sure if we can as there is no zeroing for 'struct vhost_net' in vhost_net_open(). If we don't have "n->pf_cache.va = NULL", don't we use the uninitialized data when calling page_frag_alloc_align() for the first time? > . >
On Mon, Jan 8, 2024 at 1:06 AM Yunsheng Lin <linyunsheng@huawei.com> wrote: > > On 2024/1/6 0:06, Alexander H Duyck wrote: > >> > >> static void handle_tx_copy(struct vhost_net *net, struct socket *sock) > >> @@ -1353,8 +1318,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) > >> vqs[VHOST_NET_VQ_RX]); > >> > >> f->private_data = n; > >> - n->page_frag.page = NULL; > >> - n->refcnt_bias = 0; > >> + n->pf_cache.va = NULL; > >> > >> return 0; > >> } > >> @@ -1422,8 +1386,9 @@ static int vhost_net_release(struct inode *inode, struct file *f) > >> kfree(n->vqs[VHOST_NET_VQ_RX].rxq.queue); > >> kfree(n->vqs[VHOST_NET_VQ_TX].xdp); > >> kfree(n->dev.vqs); > >> - if (n->page_frag.page) > >> - __page_frag_cache_drain(n->page_frag.page, n->refcnt_bias); > >> + if (n->pf_cache.va) > >> + __page_frag_cache_drain(virt_to_head_page(n->pf_cache.va), > >> + n->pf_cache.pagecnt_bias); > >> kvfree(n); > >> return 0; > >> } > > > > I would recommend reordering this patch with patch 5. Then you could > > remove the block that is setting "n->pf_cache.va = NULL" above and just > > make use of page_frag_cache_drain in the lower block which would also > > return the va to NULL. > > I am not sure if we can as there is no zeroing for 'struct vhost_net' in > vhost_net_open(). > > If we don't have "n->pf_cache.va = NULL", don't we use the uninitialized data > when calling page_frag_alloc_align() for the first time? I see. So kvmalloc is used instead of kvzalloc when allocating the structure. That might be an opportunity to clean things up a bit by making that change to reduce the risk of some piece of memory initialization being missed. That said, I still think reordering the two patches might be useful as it would help to make it so that the change you make to vhost_net is encapsulated in one patch to fully enable the use of the new page pool API.
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index e574e21cc0ca..805e11d598e4 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -141,10 +141,8 @@ struct vhost_net { unsigned tx_zcopy_err; /* Flush in progress. Protected by tx vq lock. */ bool tx_flush; - /* Private page frag */ - struct page_frag page_frag; - /* Refcount bias of page frag */ - int refcnt_bias; + /* Private page frag cache */ + struct page_frag_cache pf_cache; }; static unsigned vhost_net_zcopy_mask __read_mostly; @@ -655,41 +653,6 @@ static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len) !vhost_vq_avail_empty(vq->dev, vq); } -static bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz, - struct page_frag *pfrag, gfp_t gfp) -{ - if (pfrag->page) { - if (pfrag->offset + sz <= pfrag->size) - return true; - __page_frag_cache_drain(pfrag->page, net->refcnt_bias); - } - - pfrag->offset = 0; - net->refcnt_bias = 0; - if (SKB_FRAG_PAGE_ORDER) { - /* Avoid direct reclaim but allow kswapd to wake */ - pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | - __GFP_COMP | __GFP_NOWARN | - __GFP_NORETRY | __GFP_NOMEMALLOC, - SKB_FRAG_PAGE_ORDER); - if (likely(pfrag->page)) { - pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; - goto done; - } - } - pfrag->page = alloc_page(gfp); - if (likely(pfrag->page)) { - pfrag->size = PAGE_SIZE; - goto done; - } - return false; - -done: - net->refcnt_bias = USHRT_MAX; - page_ref_add(pfrag->page, USHRT_MAX - 1); - return true; -} - #define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, @@ -699,7 +662,6 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); struct socket *sock = vhost_vq_get_backend(vq); - struct page_frag *alloc_frag = &net->page_frag; struct virtio_net_hdr *gso; struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp]; struct tun_xdp_hdr *hdr; @@ -710,6 +672,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, int sock_hlen = nvq->sock_hlen; void *buf; int copied; + int ret; if (unlikely(len < nvq->sock_hlen)) return -EFAULT; @@ -719,18 +682,17 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, return -ENOSPC; buflen += SKB_DATA_ALIGN(len + pad); - alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES); - if (unlikely(!vhost_net_page_frag_refill(net, buflen, - alloc_frag, GFP_KERNEL))) + buf = page_frag_alloc_align(&net->pf_cache, buflen, GFP_KERNEL, + SMP_CACHE_BYTES); + if (unlikely(!buf)) return -ENOMEM; - buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; - copied = copy_page_from_iter(alloc_frag->page, - alloc_frag->offset + - offsetof(struct tun_xdp_hdr, gso), - sock_hlen, from); - if (copied != sock_hlen) - return -EFAULT; + copied = copy_from_iter(buf + offsetof(struct tun_xdp_hdr, gso), + sock_hlen, from); + if (copied != sock_hlen) { + ret = -EFAULT; + goto err; + } hdr = buf; gso = &hdr->gso; @@ -743,27 +705,30 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, vhost16_to_cpu(vq, gso->csum_start) + vhost16_to_cpu(vq, gso->csum_offset) + 2); - if (vhost16_to_cpu(vq, gso->hdr_len) > len) - return -EINVAL; + if (vhost16_to_cpu(vq, gso->hdr_len) > len) { + ret = -EINVAL; + goto err; + } } len -= sock_hlen; - copied = copy_page_from_iter(alloc_frag->page, - alloc_frag->offset + pad, - len, from); - if (copied != len) - return -EFAULT; + copied = copy_from_iter(buf + pad, len, from); + if (copied != len) { + ret = -EFAULT; + goto err; + } xdp_init_buff(xdp, buflen, NULL); xdp_prepare_buff(xdp, buf, pad, len, true); hdr->buflen = buflen; - --net->refcnt_bias; - alloc_frag->offset += buflen; - ++nvq->batched_xdp; return 0; + +err: + page_frag_free(buf); + return ret; } static void handle_tx_copy(struct vhost_net *net, struct socket *sock) @@ -1353,8 +1318,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) vqs[VHOST_NET_VQ_RX]); f->private_data = n; - n->page_frag.page = NULL; - n->refcnt_bias = 0; + n->pf_cache.va = NULL; return 0; } @@ -1422,8 +1386,9 @@ static int vhost_net_release(struct inode *inode, struct file *f) kfree(n->vqs[VHOST_NET_VQ_RX].rxq.queue); kfree(n->vqs[VHOST_NET_VQ_TX].xdp); kfree(n->dev.vqs); - if (n->page_frag.page) - __page_frag_cache_drain(n->page_frag.page, n->refcnt_bias); + if (n->pf_cache.va) + __page_frag_cache_drain(virt_to_head_page(n->pf_cache.va), + n->pf_cache.pagecnt_bias); kvfree(n); return 0; }