@@ -2125,7 +2125,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev)
/* clear the frag ref count which increased locally before */
for (i = 0; i < record->num_frags; i++) {
/* clear the frag ref count */
- __skb_frag_unref(&record->frags[i]);
+ __skb_frag_unref(&record->frags[i], false);
}
/* if any failure, come out from the loop. */
if (ret) {
@@ -2503,7 +2503,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
if (length == 0) {
/* don't need this page */
- __skb_frag_unref(frag);
+ __skb_frag_unref(frag, false);
--skb_shinfo(skb)->nr_frags;
} else {
size = min(length, (unsigned) PAGE_SIZE);
@@ -526,7 +526,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
fail:
while (nr > 0) {
nr--;
- __skb_frag_unref(skb_shinfo(skb)->frags + nr);
+ __skb_frag_unref(skb_shinfo(skb)->frags + nr, false);
}
return 0;
}
@@ -40,6 +40,9 @@
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <linux/netfilter/nf_conntrack_common.h>
#endif
+#if IS_BUILTIN(CONFIG_PAGE_POOL)
+#include <net/page_pool.h>
+#endif
/* The interface for checksum offload between the stack and networking drivers
* is as follows...
@@ -247,6 +250,7 @@ struct napi_struct;
struct bpf_prog;
union bpf_attr;
struct skb_ext;
+struct xdp_mem_info;
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info {
@@ -667,6 +671,8 @@ typedef unsigned char *sk_buff_data_t;
* @head_frag: skb was allocated from page fragments,
* not allocated by kmalloc() or vmalloc().
* @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
+ * @pp_recycle: mark the packet for recycling instead of freeing (implies
+ * page_pool support on driver)
* @active_extensions: active extensions (skb_ext_id types)
* @ndisc_nodetype: router type (from link layer)
* @ooo_okay: allow the mapping of a socket to a queue to be changed
@@ -791,10 +797,12 @@ struct sk_buff {
fclone:2,
peeked:1,
head_frag:1,
- pfmemalloc:1;
+ pfmemalloc:1,
+ pp_recycle:1; /* page_pool recycle indicator */
#ifdef CONFIG_SKB_EXTENSIONS
__u8 active_extensions;
#endif
+
/* fields enclosed in headers_start/headers_end are copied
* using a single memcpy() in __copy_skb_header()
*/
@@ -3081,12 +3089,20 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
/**
* __skb_frag_unref - release a reference on a paged fragment.
* @frag: the paged fragment
+ * @recycle: recycle the page if allocated via page_pool
*
- * Releases a reference on the paged fragment @frag.
+ * Releases a reference on the paged fragment @frag
+ * or recycles the page via the page_pool API.
*/
-static inline void __skb_frag_unref(skb_frag_t *frag)
+static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
{
- put_page(skb_frag_page(frag));
+ struct page *page = skb_frag_page(frag);
+
+#if IS_BUILTIN(CONFIG_PAGE_POOL)
+ if (recycle && page_pool_return_skb_page(page_address(page)))
+ return;
+#endif
+ put_page(page);
}
/**
@@ -3098,7 +3114,7 @@ static inline void __skb_frag_unref(skb_frag_t *frag)
*/
static inline void skb_frag_unref(struct sk_buff *skb, int f)
{
- __skb_frag_unref(&skb_shinfo(skb)->frags[f]);
+ __skb_frag_unref(&skb_shinfo(skb)->frags[f], skb->pp_recycle);
}
/**
@@ -4697,5 +4713,14 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb)
#endif
}
+#if IS_BUILTIN(CONFIG_PAGE_POOL)
+static inline void skb_mark_for_recycle(struct sk_buff *skb, struct page *page,
+ struct xdp_mem_info *mem)
+{
+ skb->pp_recycle = 1;
+ page_pool_store_mem_info(page, mem);
+}
+#endif
+
#endif /* __KERNEL__ */
#endif /* _LINUX_SKBUFF_H */
@@ -65,6 +65,8 @@
#define PP_ALLOC_CACHE_REFILL 64
#define PP_SIGNATURE 0x20210303
+struct xdp_mem_info;
+
struct pp_alloc_cache {
u32 count;
void *cache[PP_ALLOC_CACHE_SIZE];
@@ -148,6 +150,8 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
return pool->p.dma_dir;
}
+bool page_pool_return_skb_page(void *data);
+
struct page_pool *page_pool_create(const struct page_pool_params *params);
#ifdef CONFIG_PAGE_POOL
@@ -243,4 +247,13 @@ static inline void page_pool_ring_unlock(struct page_pool *pool)
spin_unlock_bh(&pool->ring.producer_lock);
}
+/* Store mem_info on struct page and use it while recycling skb frags */
+static inline
+void page_pool_store_mem_info(struct page *page, struct xdp_mem_info *mem)
+{
+ u32 *xmi = (u32 *)mem;
+
+ set_page_private(page, *xmi);
+}
+
#endif /* _NET_PAGE_POOL_H */
@@ -235,6 +235,7 @@ void xdp_return_buff(struct xdp_buff *xdp);
void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
struct xdp_frame_bulk *bq);
+void xdp_return_skb_frame(void *data, struct xdp_mem_info *mem);
/* When sending xdp_frame into the network stack, then there is no
* return point callback, which is needed to release e.g. DMA-mapping
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/device.h>
+#include <linux/skbuff.h>
#include <net/page_pool.h>
#include <net/xdp.h>
@@ -17,12 +18,19 @@
#include <linux/dma-mapping.h>
#include <linux/page-flags.h>
#include <linux/mm.h> /* for __put_page() */
+#include <net/xdp.h>
#include <trace/events/page_pool.h>
#define DEFER_TIME (msecs_to_jiffies(1000))
#define DEFER_WARN_INTERVAL (60 * HZ)
+/* Used to store/retrieve hi/lo bytes from xdp_mem_info to page->private */
+union page_pool_xmi {
+ u32 raw;
+ struct xdp_mem_info mem_info;
+};
+
static int page_pool_init(struct page_pool *pool,
const struct page_pool_params *params)
{
@@ -587,3 +595,38 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
}
}
EXPORT_SYMBOL(page_pool_update_nid);
+
+bool page_pool_return_skb_page(void *data)
+{
+ struct xdp_mem_info mem_info;
+ union page_pool_xmi info;
+ struct page *page;
+
+ page = virt_to_head_page(data);
+ if (unlikely(page->signature != PP_SIGNATURE))
+ return false;
+
+ info.raw = page_private(page);
+ mem_info = info.mem_info;
+
+ /* If a buffer is marked for recycle and does not belong to
+ * MEM_TYPE_PAGE_POOL, the buffers will be eventually freed from the
+ * network stack and kfree_skb, but the DMA region will *not* be
+ * correctly unmapped. WARN here for the recycling misusage
+ */
+ if (unlikely(mem_info.type != MEM_TYPE_PAGE_POOL)) {
+ WARN_ONCE(true, "Tried to recycle non MEM_TYPE_PAGE_POOL");
+ return false;
+ }
+
+ /* Driver set this to memory recycling info. Reset it on recycle
+ * This will *not* work for NIC using a split-page memory model.
+ * The page will be returned to the pool here regardless of the
+ * 'flipped' fragment being in use or not
+ */
+ set_page_private(page, 0);
+ xdp_return_skb_frame(data, &mem_info);
+
+ return true;
+}
+EXPORT_SYMBOL(page_pool_return_skb_page);
@@ -69,6 +69,9 @@
#include <net/xfrm.h>
#include <net/mpls.h>
#include <net/mptcp.h>
+#if IS_BUILTIN(CONFIG_PAGE_POOL)
+#include <net/page_pool.h>
+#endif
#include <linux/uaccess.h>
#include <trace/events/skb.h>
@@ -644,6 +647,11 @@ static void skb_free_head(struct sk_buff *skb)
{
unsigned char *head = skb->head;
+#if IS_BUILTIN(CONFIG_PAGE_POOL)
+ if (skb->pp_recycle && page_pool_return_skb_page(head))
+ return;
+#endif
+
if (skb->head_frag)
skb_free_frag(head);
else
@@ -663,7 +671,7 @@ static void skb_release_data(struct sk_buff *skb)
skb_zcopy_clear(skb, true);
for (i = 0; i < shinfo->nr_frags; i++)
- __skb_frag_unref(&shinfo->frags[i]);
+ __skb_frag_unref(&shinfo->frags[i], skb->pp_recycle);
if (shinfo->frag_list)
kfree_skb_list(shinfo->frag_list);
@@ -1045,6 +1053,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
n->nohdr = 0;
n->peeked = 0;
C(pfmemalloc);
+ C(pp_recycle);
n->destructor = NULL;
C(tail);
C(end);
@@ -3494,7 +3503,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
fragto = &skb_shinfo(tgt)->frags[merge];
skb_frag_size_add(fragto, skb_frag_size(fragfrom));
- __skb_frag_unref(fragfrom);
+ __skb_frag_unref(fragfrom, skb->pp_recycle);
}
/* Reposition in the original skb */
@@ -5275,6 +5284,13 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
if (skb_cloned(to))
return false;
+ /* We can't coalesce skb that are allocated from slab and page_pool
+ * The recycle mark is on the skb, so that might end up trying to
+ * recycle slab allocated skb->head
+ */
+ if (to->pp_recycle != from->pp_recycle)
+ return false;
+
if (len <= skb_tailroom(to)) {
if (len)
BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
@@ -372,6 +372,12 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
}
}
+void xdp_return_skb_frame(void *data, struct xdp_mem_info *mem)
+{
+ __xdp_return(data, mem, false, NULL);
+}
+EXPORT_SYMBOL_GPL(xdp_return_skb_frame);
+
void xdp_return_frame(struct xdp_frame *xdpf)
{
__xdp_return(xdpf->data, &xdpf->mem, false, NULL);
@@ -127,7 +127,7 @@ static void destroy_record(struct tls_record_info *record)
int i;
for (i = 0; i < record->num_frags; i++)
- __skb_frag_unref(&record->frags[i]);
+ __skb_frag_unref(&record->frags[i], false);
kfree(record);
}