From patchwork Fri May 21 09:30:24 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Xin, Xiaohui" X-Patchwork-Id: 101369 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o4L9T2TB010426 for ; Fri, 21 May 2010 09:29:04 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1758171Ab0EUJ1l (ORCPT ); Fri, 21 May 2010 05:27:41 -0400 Received: from mga01.intel.com ([192.55.52.88]:62553 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754897Ab0EUJXx (ORCPT ); Fri, 21 May 2010 05:23:53 -0400 Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by fmsmga101.fm.intel.com with ESMTP; 21 May 2010 02:20:45 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.53,277,1272870000"; d="scan'208";a="569306356" Received: from unknown (HELO localhost.localdomain) ([10.239.36.106]) by fmsmga002.fm.intel.com with ESMTP; 21 May 2010 02:22:56 -0700 From: xiaohui.xin@intel.com To: netdev@vger.kernel.org, kvm@vger.kernel.org, linux-kernel@vger.kernel.org, mst@redhat.com, mingo@elte.hu, davem@davemloft.net, jdike@linux.intel.com Cc: Xin Xiaohui Subject: [RFC][PATCH v6 08/19] Make __alloc_skb() to get external buffer. Date: Fri, 21 May 2010 17:30:24 +0800 Message-Id: <1274434235-5929-9-git-send-email-xiaohui.xin@intel.com> X-Mailer: git-send-email 1.5.4.4 In-Reply-To: <1274434235-5929-8-git-send-email-xiaohui.xin@intel.com> References: <1274434235-5929-1-git-send-email-xiaohui.xin@intel.com> <1274434235-5929-2-git-send-email-xiaohui.xin@intel.com> <1274434235-5929-3-git-send-email-xiaohui.xin@intel.com> <1274434235-5929-4-git-send-email-xiaohui.xin@intel.com> <1274434235-5929-5-git-send-email-xiaohui.xin@intel.com> <1274434235-5929-6-git-send-email-xiaohui.xin@intel.com> <1274434235-5929-7-git-send-email-xiaohui.xin@intel.com> <1274434235-5929-8-git-send-email-xiaohui.xin@intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Fri, 21 May 2010 09:29:04 +0000 (UTC) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 281a1c0..5ff8c27 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -442,17 +442,18 @@ extern void kfree_skb(struct sk_buff *skb); extern void consume_skb(struct sk_buff *skb); extern void __kfree_skb(struct sk_buff *skb); extern struct sk_buff *__alloc_skb(unsigned int size, - gfp_t priority, int fclone, int node); + gfp_t priority, int fclone, + int node, struct net_device *dev); static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) { - return __alloc_skb(size, priority, 0, -1); + return __alloc_skb(size, priority, 0, -1, NULL); } static inline struct sk_buff *alloc_skb_fclone(unsigned int size, gfp_t priority) { - return __alloc_skb(size, priority, 1, -1); + return __alloc_skb(size, priority, 1, -1, NULL); } extern int skb_recycle_check(struct sk_buff *skb, int skb_size); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fbdb1f1..38d19d0 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -161,7 +161,8 @@ EXPORT_SYMBOL(skb_under_panic); * @fclone: allocate from fclone cache instead of head cache * and allocate a cloned (child) skb * @node: numa node to allocate memory on - * + * @dev: a device owns the skb if the skb try to get external buffer. + * otherwise is NULL. * Allocate a new &sk_buff. The returned buffer has no headroom and a * tail room of size bytes. The object has a reference count of one. * The return is the buffer. On a failure the return is %NULL. @@ -170,12 +171,13 @@ EXPORT_SYMBOL(skb_under_panic); * %GFP_ATOMIC. */ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, - int fclone, int node) + int fclone, int node, struct net_device *dev) { struct kmem_cache *cache; struct skb_shared_info *shinfo; struct sk_buff *skb; - u8 *data; + u8 *data = NULL; + struct skb_external_page *ext_page = NULL; cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; @@ -185,8 +187,23 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, goto out; size = SKB_DATA_ALIGN(size); - data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), - gfp_mask, node); + + /* If the device wants to do mediate passthru(zero-copy), + * the skb may try to get external buffers from outside. + * If fails, then fall back to alloc buffers from kernel. + */ + if (dev && dev->mp_port) { + ext_page = netdev_alloc_external_page(dev, skb, size); + if (ext_page) { + data = ext_page->start; + size = ext_page->size; + } + } + + if (!data) + data = kmalloc_node_track_caller( + size + sizeof(struct skb_shared_info), + gfp_mask, node); if (!data) goto nodata; @@ -208,6 +225,15 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->mac_header = ~0U; #endif + /* If the skb get external buffers sucessfully, since the shinfo is + * at the end of the buffer, we may retain the shinfo once we + * need it sometime. + */ + if (ext_page) { + skb->head = skb->data - NET_IP_ALIGN - NET_SKB_PAD; + memcpy(ext_page->ushinfo, skb_shinfo(skb), + sizeof(struct skb_shared_info)); + } /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); atomic_set(&shinfo->dataref, 1); @@ -231,6 +257,11 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, child->fclone = SKB_FCLONE_UNAVAILABLE; } + /* Record the external buffer info in this field. It's not so good, + * but we cannot find another place easily. + */ + shinfo->destructor_arg = ext_page; + out: return skb; nodata: @@ -259,7 +290,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1; struct sk_buff *skb; - skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node); + skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node, dev); if (likely(skb)) { skb_reserve(skb, NET_SKB_PAD); skb->dev = dev;