Message ID | 20190829155014.9229-6-tbogendoerfer@suse.de (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | ioc3-eth improvements | expand |
On Thu, 29 Aug 2019 17:50:03 +0200, Thomas Bogendoerfer wrote: > Memory for descriptor rings are allocated/freed, when interface is > brought up/down. Since the size of the rings is not changeable by > hardware, we now allocate rings now during probe and free it, when > device is removed. > > Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de> > --- > drivers/net/ethernet/sgi/ioc3-eth.c | 103 ++++++++++++++++++------------------ > 1 file changed, 51 insertions(+), 52 deletions(-) > > diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c > index ba18a53fbbe6..d9d94a55ac34 100644 > --- a/drivers/net/ethernet/sgi/ioc3-eth.c > +++ b/drivers/net/ethernet/sgi/ioc3-eth.c > @@ -803,25 +803,17 @@ static void ioc3_free_rings(struct ioc3_private *ip) > struct sk_buff *skb; > int rx_entry, n_entry; > > - if (ip->txr) { > - ioc3_clean_tx_ring(ip); > - free_pages((unsigned long)ip->txr, 2); > - ip->txr = NULL; > - } > + ioc3_clean_tx_ring(ip); > > - if (ip->rxr) { > - n_entry = ip->rx_ci; > - rx_entry = ip->rx_pi; > + n_entry = ip->rx_ci; > + rx_entry = ip->rx_pi; > > - while (n_entry != rx_entry) { > - skb = ip->rx_skbs[n_entry]; > - if (skb) > - dev_kfree_skb_any(skb); > + while (n_entry != rx_entry) { > + skb = ip->rx_skbs[n_entry]; > + if (skb) > + dev_kfree_skb_any(skb); I think dev_kfree_skb_any() accepts NULL > > - n_entry = (n_entry + 1) & RX_RING_MASK; > - } > - free_page((unsigned long)ip->rxr); > - ip->rxr = NULL; > + n_entry = (n_entry + 1) & RX_RING_MASK; > } > } > > @@ -829,49 +821,34 @@ static void ioc3_alloc_rings(struct net_device *dev) > { > struct ioc3_private *ip = netdev_priv(dev); > struct ioc3_erxbuf *rxb; > - unsigned long *rxr; > int i; > > - if (!ip->rxr) { > - /* Allocate and initialize rx ring. 4kb = 512 entries */ > - ip->rxr = (unsigned long *)get_zeroed_page(GFP_ATOMIC); > - rxr = ip->rxr; > - if (!rxr) > - pr_err("%s: get_zeroed_page() failed!\n", __func__); > - > - /* Now the rx buffers. The RX ring may be larger but > - * we only allocate 16 buffers for now. Need to tune > - * this for performance and memory later. > - */ > - for (i = 0; i < RX_BUFFS; i++) { > - struct sk_buff *skb; > + /* Now the rx buffers. The RX ring may be larger but > + * we only allocate 16 buffers for now. Need to tune > + * this for performance and memory later. > + */ > + for (i = 0; i < RX_BUFFS; i++) { > + struct sk_buff *skb; > > - skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC); > - if (!skb) { > - show_free_areas(0, NULL); > - continue; > - } > + skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC); > + if (!skb) { > + show_free_areas(0, NULL); > + continue; > + } > > - ip->rx_skbs[i] = skb; > + ip->rx_skbs[i] = skb; > > - /* Because we reserve afterwards. */ > - skb_put(skb, (1664 + RX_OFFSET)); > - rxb = (struct ioc3_erxbuf *)skb->data; > - rxr[i] = cpu_to_be64(ioc3_map(rxb, 1)); > - skb_reserve(skb, RX_OFFSET); > - } > - ip->rx_ci = 0; > - ip->rx_pi = RX_BUFFS; > + /* Because we reserve afterwards. */ > + skb_put(skb, (1664 + RX_OFFSET)); > + rxb = (struct ioc3_erxbuf *)skb->data; > + ip->rxr[i] = cpu_to_be64(ioc3_map(rxb, 1)); > + skb_reserve(skb, RX_OFFSET); > } > + ip->rx_ci = 0; > + ip->rx_pi = RX_BUFFS; > > - if (!ip->txr) { > - /* Allocate and initialize tx rings. 16kb = 128 bufs. */ > - ip->txr = (struct ioc3_etxd *)__get_free_pages(GFP_KERNEL, 2); > - if (!ip->txr) > - pr_err("%s: __get_free_pages() failed!\n", __func__); > - ip->tx_pi = 0; > - ip->tx_ci = 0; > - } > + ip->tx_pi = 0; > + ip->tx_ci = 0; > } > > static void ioc3_init_rings(struct net_device *dev) > @@ -1239,6 +1216,23 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent) > timer_setup(&ip->ioc3_timer, ioc3_timer, 0); > > ioc3_stop(ip); > + > + /* Allocate and rx ring. 4kb = 512 entries */ > + ip->rxr = (unsigned long *)get_zeroed_page(GFP_ATOMIC); > + if (!ip->rxr) { > + pr_err("ioc3-eth: rx ring allocation failed\n"); > + err = -ENOMEM; > + goto out_stop; > + } > + > + /* Allocate tx rings. 16kb = 128 bufs. */ > + ip->txr = (struct ioc3_etxd *)__get_free_pages(GFP_KERNEL, 2); > + if (!ip->txr) { > + pr_err("ioc3-eth: tx ring allocation failed\n"); > + err = -ENOMEM; > + goto out_stop; > + } Please just use kcalloc()/kmalloc_array() here, and make sure the flags are set to GFP_KERNEL whenever possible. Here and in ioc3_alloc_rings() it looks like GFP_ATOMIC is unnecessary.
On Thu, 29 Aug 2019 14:05:37 -0700 Jakub Kicinski <jakub.kicinski@netronome.com> wrote: > On Thu, 29 Aug 2019 17:50:03 +0200, Thomas Bogendoerfer wrote: > > + if (skb) > > + dev_kfree_skb_any(skb); > > I think dev_kfree_skb_any() accepts NULL yes, I'll drop the if > > + > > + /* Allocate and rx ring. 4kb = 512 entries */ > > + ip->rxr = (unsigned long *)get_zeroed_page(GFP_ATOMIC); > > + if (!ip->rxr) { > > + pr_err("ioc3-eth: rx ring allocation failed\n"); > > + err = -ENOMEM; > > + goto out_stop; > > + } > > + > > + /* Allocate tx rings. 16kb = 128 bufs. */ > > + ip->txr = (struct ioc3_etxd *)__get_free_pages(GFP_KERNEL, 2); > > + if (!ip->txr) { > > + pr_err("ioc3-eth: tx ring allocation failed\n"); > > + err = -ENOMEM; > > + goto out_stop; > > + } > > Please just use kcalloc()/kmalloc_array() here, both allocation will be replaced in patch 11 with dma_direct_alloc_pages. So I hope I don't need to change it here. Out of curiosity does kcalloc/kmalloc_array give me the same guarantees about alignment ? rx ring needs to be 4KB aligned, tx ring 16KB aligned. >, and make sure the flags > are set to GFP_KERNEL whenever possible. Here and in ioc3_alloc_rings() > it looks like GFP_ATOMIC is unnecessary. yes, I'll change it Thomas.
On Fri, 30 Aug 2019 00:00:58 +0200, Thomas Bogendoerfer wrote: > On Thu, 29 Aug 2019 14:05:37 -0700 > Jakub Kicinski <jakub.kicinski@netronome.com> wrote: > > > On Thu, 29 Aug 2019 17:50:03 +0200, Thomas Bogendoerfer wrote: > > > + if (skb) > > > + dev_kfree_skb_any(skb); > > > > I think dev_kfree_skb_any() accepts NULL > > yes, I'll drop the if > > > > + > > > + /* Allocate and rx ring. 4kb = 512 entries */ > > > + ip->rxr = (unsigned long *)get_zeroed_page(GFP_ATOMIC); > > > + if (!ip->rxr) { > > > + pr_err("ioc3-eth: rx ring allocation failed\n"); > > > + err = -ENOMEM; > > > + goto out_stop; > > > + } > > > + > > > + /* Allocate tx rings. 16kb = 128 bufs. */ > > > + ip->txr = (struct ioc3_etxd *)__get_free_pages(GFP_KERNEL, 2); > > > + if (!ip->txr) { > > > + pr_err("ioc3-eth: tx ring allocation failed\n"); > > > + err = -ENOMEM; > > > + goto out_stop; > > > + } > > > > Please just use kcalloc()/kmalloc_array() here, > > both allocation will be replaced in patch 11 with dma_direct_alloc_pages. > So I hope I don't need to change it here. Ah, missed that! > Out of curiosity does kcalloc/kmalloc_array give me the same guarantees about > alignment ? rx ring needs to be 4KB aligned, tx ring 16KB aligned. I don't think so, actually, I was mostly worried you are passing address from get_page() into kfree() here ;) But patch 11 cures that, so that's good, too. > >, and make sure the flags > > are set to GFP_KERNEL whenever possible. Here and in ioc3_alloc_rings() > > it looks like GFP_ATOMIC is unnecessary. > > yes, I'll change it
On Thu, 29 Aug 2019 15:05:04 -0700 Jakub Kicinski <jakub.kicinski@netronome.com> wrote: > On Fri, 30 Aug 2019 00:00:58 +0200, Thomas Bogendoerfer wrote: > > Out of curiosity does kcalloc/kmalloc_array give me the same guarantees about > > alignment ? rx ring needs to be 4KB aligned, tx ring 16KB aligned. > > I don't think so, actually, I was mostly worried you are passing > address from get_page() into kfree() here ;) But patch 11 cures that, > so that's good, too. I realized that after sending my last mail. I'll fix that in v3 even it's just a transient bug. Thomas.
diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index ba18a53fbbe6..d9d94a55ac34 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -803,25 +803,17 @@ static void ioc3_free_rings(struct ioc3_private *ip) struct sk_buff *skb; int rx_entry, n_entry; - if (ip->txr) { - ioc3_clean_tx_ring(ip); - free_pages((unsigned long)ip->txr, 2); - ip->txr = NULL; - } + ioc3_clean_tx_ring(ip); - if (ip->rxr) { - n_entry = ip->rx_ci; - rx_entry = ip->rx_pi; + n_entry = ip->rx_ci; + rx_entry = ip->rx_pi; - while (n_entry != rx_entry) { - skb = ip->rx_skbs[n_entry]; - if (skb) - dev_kfree_skb_any(skb); + while (n_entry != rx_entry) { + skb = ip->rx_skbs[n_entry]; + if (skb) + dev_kfree_skb_any(skb); - n_entry = (n_entry + 1) & RX_RING_MASK; - } - free_page((unsigned long)ip->rxr); - ip->rxr = NULL; + n_entry = (n_entry + 1) & RX_RING_MASK; } } @@ -829,49 +821,34 @@ static void ioc3_alloc_rings(struct net_device *dev) { struct ioc3_private *ip = netdev_priv(dev); struct ioc3_erxbuf *rxb; - unsigned long *rxr; int i; - if (!ip->rxr) { - /* Allocate and initialize rx ring. 4kb = 512 entries */ - ip->rxr = (unsigned long *)get_zeroed_page(GFP_ATOMIC); - rxr = ip->rxr; - if (!rxr) - pr_err("%s: get_zeroed_page() failed!\n", __func__); - - /* Now the rx buffers. The RX ring may be larger but - * we only allocate 16 buffers for now. Need to tune - * this for performance and memory later. - */ - for (i = 0; i < RX_BUFFS; i++) { - struct sk_buff *skb; + /* Now the rx buffers. The RX ring may be larger but + * we only allocate 16 buffers for now. Need to tune + * this for performance and memory later. + */ + for (i = 0; i < RX_BUFFS; i++) { + struct sk_buff *skb; - skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC); - if (!skb) { - show_free_areas(0, NULL); - continue; - } + skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC); + if (!skb) { + show_free_areas(0, NULL); + continue; + } - ip->rx_skbs[i] = skb; + ip->rx_skbs[i] = skb; - /* Because we reserve afterwards. */ - skb_put(skb, (1664 + RX_OFFSET)); - rxb = (struct ioc3_erxbuf *)skb->data; - rxr[i] = cpu_to_be64(ioc3_map(rxb, 1)); - skb_reserve(skb, RX_OFFSET); - } - ip->rx_ci = 0; - ip->rx_pi = RX_BUFFS; + /* Because we reserve afterwards. */ + skb_put(skb, (1664 + RX_OFFSET)); + rxb = (struct ioc3_erxbuf *)skb->data; + ip->rxr[i] = cpu_to_be64(ioc3_map(rxb, 1)); + skb_reserve(skb, RX_OFFSET); } + ip->rx_ci = 0; + ip->rx_pi = RX_BUFFS; - if (!ip->txr) { - /* Allocate and initialize tx rings. 16kb = 128 bufs. */ - ip->txr = (struct ioc3_etxd *)__get_free_pages(GFP_KERNEL, 2); - if (!ip->txr) - pr_err("%s: __get_free_pages() failed!\n", __func__); - ip->tx_pi = 0; - ip->tx_ci = 0; - } + ip->tx_pi = 0; + ip->tx_ci = 0; } static void ioc3_init_rings(struct net_device *dev) @@ -1239,6 +1216,23 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent) timer_setup(&ip->ioc3_timer, ioc3_timer, 0); ioc3_stop(ip); + + /* Allocate and rx ring. 4kb = 512 entries */ + ip->rxr = (unsigned long *)get_zeroed_page(GFP_ATOMIC); + if (!ip->rxr) { + pr_err("ioc3-eth: rx ring allocation failed\n"); + err = -ENOMEM; + goto out_stop; + } + + /* Allocate tx rings. 16kb = 128 bufs. */ + ip->txr = (struct ioc3_etxd *)__get_free_pages(GFP_KERNEL, 2); + if (!ip->txr) { + pr_err("ioc3-eth: tx ring allocation failed\n"); + err = -ENOMEM; + goto out_stop; + } + ioc3_init(dev); ip->pdev = pdev; @@ -1293,6 +1287,8 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ioc3_stop(ip); del_timer_sync(&ip->ioc3_timer); ioc3_free_rings(ip); + kfree(ip->rxr); + kfree(ip->txr); out_res: pci_release_regions(pdev); out_free: @@ -1310,6 +1306,9 @@ static void ioc3_remove_one(struct pci_dev *pdev) struct net_device *dev = pci_get_drvdata(pdev); struct ioc3_private *ip = netdev_priv(dev); + kfree(ip->rxr); + kfree(ip->txr); + unregister_netdev(dev); del_timer_sync(&ip->ioc3_timer);
Memory for descriptor rings are allocated/freed, when interface is brought up/down. Since the size of the rings is not changeable by hardware, we now allocate rings now during probe and free it, when device is removed. Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de> --- drivers/net/ethernet/sgi/ioc3-eth.c | 103 ++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 52 deletions(-)