diff mbox

[1/2] add mt7601u driver

Message ID 20150505124414.3c733633@north (mailing list archive)
State Not Applicable
Delegated to: Kalle Valo
Headers show

Commit Message

Jakub Kici?ski May 5, 2015, 10:44 a.m. UTC
On Tue, 05 May 2015 08:45:42 +0200, Johannes Berg wrote:
> On Tue, 2015-05-05 at 02:49 +0200, Jakub Kici?ski wrote:
> > On Mon, 04 May 2015 12:15:11 +0200, Johannes Berg wrote:
> > > On Mon, 2015-05-04 at 12:04 +0200, Jakub Kici?ski wrote:
> > > 
> > > > > Don't know how your buffers are set up, but if the DMA engine consumes
> > > > > pages you could consider using paged RX instead of the memcpy().
> > > > 
> > > > DMA engine can concatenate multiple frames into a single USB bulk
> > > > transfer to a large continuous buffer.  There is no way to request 
> > > > any alignment of the frames within that large buffer so I think paged 
> > > > RX is not an option.
> > > 
> > > You could probably still do it because the networking stack only
> > > requires the *headers* to be aligned. But if they headers aren't in the
> > > skb->data (skb->head) then they'll be pulled into that by the network
> > > stack, where they'll be aligned.
> > 
> > I *think* I got it.  Would you care to take a look?  I basically
> > allocate a compound page with dev_alloc_pages() and run get_page() for
> > every fragment.
> 
> I think it looks fine - except the compound part. I'm pretty sure you
> need to do references always with the pointer to the first page (and
> consequently don't need to split on the page boundary, and use the same
> page pointer just with a bigger offset)

I wasn't sure about the splitting and when I split and took the
reference on head page -> mapcount got corrupted.  But you are right,
no splitting and reference on head works fine.

> We have a comment on this code (from Eric) saying
> 
>         /* Dont use dev_alloc_skb(), we'll have enough headroom once   
>          * ieee80211_hdr pulled.
> 
> which probably applies here as well. We also just use 128 instead of 256
> and haven't seen a need for more.
> 
> We also pull the entire frame only if it fits into those 128 bytes, and
> we preload the 802.11 header + 8 bytes for snap and possibly crypto
> headroom so we can do it all at once instead of later in multiple
> places. You can check out iwl_mvm_pass_packet_to_mac80211() to see the
> details.

Done.

Thanks a lot for your comments, here is the improved version.  I
will give it some more testing and send v2 of the entire driver.

------------>8--------------------------------

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/dma.c b/dma.c
index e865cf8384a5..4510b3a1357b 100644
--- a/dma.c
+++ b/dma.c
@@ -16,6 +16,9 @@ 
 #include "usb.h"
 #include "trace.h"
 
+static int mt7601u_submit_rx_buf(struct mt7601u_dev *dev,
+				 struct mt7601u_dma_buf_rx *e, gfp_t gfp);
+
 static unsigned int ieee80211_get_hdrlen_from_buf(const u8 *data, unsigned len)
 {
 	const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *)data;
@@ -34,6 +37,7 @@  mt7601u_rx_skb_from_seg(struct mt7601u_dev *dev, struct mt7601u_rxwi *rxwi,
 			u8 *data, u32 seg_len)
 {
 	struct sk_buff *skb;
+	u32 true_len;
 
 	if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_L2PAD))
 		seg_len -= 2;
@@ -52,15 +56,55 @@  mt7601u_rx_skb_from_seg(struct mt7601u_dev *dev, struct mt7601u_rxwi *rxwi,
 
 	memcpy(skb_put(skb, seg_len), data, seg_len);
 
+	true_len = mt76_mac_process_rx(dev, skb, skb->data, rxwi);
+	skb_trim(skb, true_len);
+
 	return skb;
 }
 
-static void
-mt7601u_rx_process_seg(struct mt7601u_dev *dev, u8 *data, u32 seg_len)
+static struct sk_buff *
+mt7601u_rx_skb_from_seg_paged(struct mt7601u_dev *dev,
+			      struct mt7601u_rxwi *rxwi, void *data,
+			      u32 seg_len, u32 truesize, struct page *p)
+{
+	unsigned int hdr_len = ieee80211_get_hdrlen_from_buf(data, seg_len);
+	unsigned int true_len, copy, frag;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(128, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
+	true_len = mt76_mac_process_rx(dev, skb, data, rxwi);
+
+	if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_L2PAD)) {
+		memcpy(skb_put(skb, hdr_len), data, hdr_len);
+		data += hdr_len + 2;
+		true_len -= hdr_len;
+		hdr_len = 0;
+	}
+
+	copy = (true_len <= skb_tailroom(skb)) ? true_len : hdr_len + 8;
+	frag = true_len - copy;
+
+	memcpy(skb_put(skb, copy), data, copy);
+	data += copy;
+
+	if (frag) {
+		skb_add_rx_frag(skb, 0, p, data - page_address(p),
+				frag, truesize);
+		get_page(p);
+	}
+
+	return skb;
+}
+
+static void mt7601u_rx_process_seg(struct mt7601u_dev *dev, u8 *data,
+				   u32 seg_len, struct page *p, bool paged)
 {
 	struct sk_buff *skb;
 	struct mt7601u_rxwi *rxwi;
-	u32 fce_info;
+	u32 fce_info, truesize = seg_len;
 
 	/* DMA_INFO field at the beginning of the segment contains only some of
 	 * the information, we need to read the FCE descriptor from the end.
@@ -82,15 +126,14 @@  mt7601u_rx_process_seg(struct mt7601u_dev *dev, u8 *data, u32 seg_len)
 
 	trace_mt_rx(dev, rxwi, fce_info);
 
-	skb = mt7601u_rx_skb_from_seg(dev, rxwi, data, seg_len);
+	if (paged)
+		skb = mt7601u_rx_skb_from_seg_paged(dev, rxwi, data, seg_len,
+						    truesize, p);
+	else
+		skb = mt7601u_rx_skb_from_seg(dev, rxwi, data, seg_len);
 	if (!skb)
 		return;
 
-	if (mt76_mac_process_rx(dev, skb, rxwi)) {
-		dev_kfree_skb(skb);
-		return;
-	}
-
 	ieee80211_rx_ni(dev->hw, skb);
 }
 
@@ -110,17 +153,28 @@  static u16 mt7601u_rx_next_seg_len(u8 *data, u32 data_len)
 }
 
 static void
-mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf *e)
+mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf_rx *e)
 {
 	u32 seg_len, data_len = e->urb->actual_length;
-	u8 *data = e->buf;
+	u8 *data = page_address(e->p);
+	struct page *new_p = NULL;
+	bool paged = true;
 	int cnt = 0;
 
 	if (!test_bit(MT7601U_STATE_INITIALIZED, &dev->state))
 		return;
 
+	/* Copy if there is very little data in the buffer. */
+	if (data_len < 512) {
+		paged = false;
+	} else {
+		new_p = dev_alloc_pages(MT_RX_ORDER);
+		if (!new_p)
+			paged = false;
+	}
+
 	while ((seg_len = mt7601u_rx_next_seg_len(data, data_len))) {
-		mt7601u_rx_process_seg(dev, data, seg_len);
+		mt7601u_rx_process_seg(dev, data, seg_len, e->p, paged);
 
 		data_len -= seg_len;
 		data += seg_len;
@@ -128,14 +182,21 @@  mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf *e)
 	}
 
 	if (cnt > 1)
-		trace_mt_rx_dma_aggr(dev, cnt);
+		trace_mt_rx_dma_aggr(dev, cnt, paged);
+
+	if (paged) {
+		/* we have one extra ref from the allocator */
+		__free_pages(e->p, MT_RX_ORDER);
+
+		e->p = new_p;
+	}
 }
 
-static struct mt7601u_dma_buf *
+static struct mt7601u_dma_buf_rx *
 mt7601u_rx_get_pending_entry(struct mt7601u_dev *dev)
 {
 	struct mt7601u_rx_queue *q = &dev->rx_q;
-	struct mt7601u_dma_buf *buf = NULL;
+	struct mt7601u_dma_buf_rx *buf = NULL;
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev->rx_lock, flags);
@@ -175,15 +236,14 @@  out:
 static void mt7601u_rx_tasklet(unsigned long data)
 {
 	struct mt7601u_dev *dev = (struct mt7601u_dev *) data;
-	struct mt7601u_dma_buf *e;
+	struct mt7601u_dma_buf_rx *e;
 
 	while ((e = mt7601u_rx_get_pending_entry(dev))) {
 		if (e->urb->status)
 			continue;
 
 		mt7601u_rx_process_entry(dev, e);
-		mt7601u_usb_submit_buf(dev, USB_DIR_IN, MT_EP_IN_PKT_RX, e,
-				       GFP_ATOMIC, mt7601u_complete_rx, dev);
+		mt7601u_submit_rx_buf(dev, e, GFP_ATOMIC);
 	}
 }
 
@@ -325,14 +385,33 @@  static void mt7601u_kill_rx(struct mt7601u_dev *dev)
 	spin_unlock_irqrestore(&dev->rx_lock, flags);
 }
 
+static int mt7601u_submit_rx_buf(struct mt7601u_dev *dev,
+				 struct mt7601u_dma_buf_rx *e, gfp_t gfp)
+{
+	struct usb_device *usb_dev = mt7601u_to_usb_dev(dev);
+	u8 *buf = page_address(e->p);
+	unsigned pipe;
+	int ret;
+
+	pipe = usb_rcvbulkpipe(usb_dev, dev->in_eps[MT_EP_IN_PKT_RX]);
+
+	usb_fill_bulk_urb(e->urb, usb_dev, pipe, buf, MT_RX_URB_SIZE,
+			  mt7601u_complete_rx, dev);
+
+	trace_mt_submit_urb(dev, e->urb);
+	ret = usb_submit_urb(e->urb, gfp);
+	if (ret)
+		dev_err(dev->dev, "Error: submit RX URB failed:%d\n", ret);
+
+	return ret;
+}
+
 static int mt7601u_submit_rx(struct mt7601u_dev *dev)
 {
 	int i, ret;
 
 	for (i = 0; i < dev->rx_q.entries; i++) {
-		ret = mt7601u_usb_submit_buf(dev, USB_DIR_IN, MT_EP_IN_PKT_RX,
-					     &dev->rx_q.e[i], GFP_KERNEL,
-					     mt7601u_complete_rx, dev);
+		ret = mt7601u_submit_rx_buf(dev, &dev->rx_q.e[i], GFP_KERNEL);
 		if (ret)
 			return ret;
 	}
@@ -344,8 +423,10 @@  static void mt7601u_free_rx(struct mt7601u_dev *dev)
 {
 	int i;
 
-	for (i = 0; i < dev->rx_q.entries; i++)
-		mt7601u_usb_free_buf(dev, &dev->rx_q.e[i]);
+	for (i = 0; i < dev->rx_q.entries; i++) {
+		__free_pages(dev->rx_q.e[i].p, MT_RX_ORDER);
+		usb_free_urb(dev->rx_q.e[i].urb);
+	}
 }
 
 static int mt7601u_alloc_rx(struct mt7601u_dev *dev)
@@ -356,9 +437,13 @@  static int mt7601u_alloc_rx(struct mt7601u_dev *dev)
 	dev->rx_q.dev = dev;
 	dev->rx_q.entries = N_RX_ENTRIES;
 
-	for (i = 0; i < N_RX_ENTRIES; i++)
-		if (mt7601u_usb_alloc_buf(dev, MT_RX_URB_SIZE, &dev->rx_q.e[i]))
+	for (i = 0; i < N_RX_ENTRIES; i++) {
+		dev->rx_q.e[i].urb = usb_alloc_urb(0, GFP_KERNEL);
+		dev->rx_q.e[i].p = dev_alloc_pages(MT_RX_ORDER);
+
+		if (!dev->rx_q.e[i].urb || !dev->rx_q.e[i].p)
 			return -ENOMEM;
+	}
 
 	return 0;
 }
diff --git a/mac.c b/mac.c
index 539751362b41..8802366f7181 100644
--- a/mac.c
+++ b/mac.c
@@ -441,30 +441,28 @@  mt7601u_rx_monitor_beacon(struct mt7601u_dev *dev, struct mt7601u_rxwi *rxwi,
 }
 
 static int
-mt7601u_rx_is_our_beacon(struct mt7601u_dev *dev, struct sk_buff *skb)
+mt7601u_rx_is_our_beacon(struct mt7601u_dev *dev, u8 *data)
 {
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)data;
 
 	return ieee80211_is_beacon(hdr->frame_control) &&
 		ether_addr_equal(hdr->addr2, dev->ap_bssid);
 }
 
-int mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb, void *rxi)
+u32 mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb,
+			u8 *data, void *rxi)
 {
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	struct mt7601u_rxwi *rxwi = rxi;
 	u32 ctl = le32_to_cpu(rxwi->ctl);
 	u16 rate = le16_to_cpu(rxwi->rate);
-	int len, rssi;
+	int rssi;
 
 	if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_DECRYPT)) {
 		status->flag |= RX_FLAG_DECRYPTED;
 		status->flag |= RX_FLAG_IV_STRIPPED | RX_FLAG_MMIC_STRIPPED;
 	}
 
-	len = MT76_GET(MT_RXWI_CTL_MPDU_LEN, ctl);
-	skb_trim(skb, len);
-
 	status->chains = BIT(0);
 	rssi = mt7601u_phy_get_rssi(dev, rxwi, rate);
 	status->chain_signal[0] = status->signal = rssi;
@@ -474,13 +472,13 @@  int mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb, void *rxi)
 	mt76_mac_process_rate(status, rate);
 
 	spin_lock_bh(&dev->con_mon_lock);
-	if (mt7601u_rx_is_our_beacon(dev, skb))
+	if (mt7601u_rx_is_our_beacon(dev, data))
 		mt7601u_rx_monitor_beacon(dev, rxwi, rate, rssi);
 	else if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_U2M))
 		dev->avg_rssi = (dev->avg_rssi * 15) / 16 + (rssi << 8);
 	spin_unlock_bh(&dev->con_mon_lock);
 
-	return 0;
+	return MT76_GET(MT_RXWI_CTL_MPDU_LEN, ctl);
 }
 
 static enum mt76_cipher_type
diff --git a/mac.h b/mac.h
index 90cf147f37e7..2c22d63c63a2 100644
--- a/mac.h
+++ b/mac.h
@@ -160,8 +160,8 @@  struct mt76_txwi {
 #define MT_TXWI_CTL_CHAN_CHECK_PKT	BIT(4)
 #define MT_TXWI_CTL_PIFS_REV		BIT(6)
 
-int
-mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb, void *rxwi);
+u32 mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb,
+			u8 *data, void *rxi);
 int mt76_mac_wcid_set_key(struct mt7601u_dev *dev, u8 idx,
 			  struct ieee80211_key_conf *key);
 void mt76_mac_wcid_set_rate(struct mt7601u_dev *dev, struct mt76_wcid *wcid,
diff --git a/mt7601u.h b/mt7601u.h
index f9957350a187..d6f83e03cb58 100644
--- a/mt7601u.h
+++ b/mt7601u.h
@@ -37,6 +37,7 @@ 
 #define MT_USB_AGGR_SIZE_LIMIT		21 /* * 1024B */
 #define MT_USB_AGGR_TIMEOUT		0x80 /* * 33ns */
 #define MT_RX_URB_SIZE			(24 * 1024)
+#define MT_RX_ORDER			3
 
 struct mt7601u_dma_buf {
 	struct urb *urb;
@@ -73,7 +74,10 @@  struct mac_stats {
 struct mt7601u_rx_queue {
 	struct mt7601u_dev *dev;
 
-	struct mt7601u_dma_buf e[N_RX_ENTRIES];
+	struct mt7601u_dma_buf_rx {
+		struct urb *urb;
+		struct page *p;
+	} e[N_RX_ENTRIES];
 
 	unsigned int start;
 	unsigned int end;
diff --git a/trace.h b/trace.h
index db86272401ff..289897300ef0 100644
--- a/trace.h
+++ b/trace.h
@@ -352,17 +352,20 @@  TRACE_EVENT(mt_tx_status,
 );
 
 TRACE_EVENT(mt_rx_dma_aggr,
-	TP_PROTO(struct mt7601u_dev *dev, int cnt),
-	TP_ARGS(dev, cnt),
+	TP_PROTO(struct mt7601u_dev *dev, int cnt, bool paged),
+	TP_ARGS(dev, cnt, paged),
 	TP_STRUCT__entry(
 		DEV_ENTRY
 		__field(u8, cnt)
+		__field(bool, paged)
 	),
 	TP_fast_assign(
 		DEV_ASSIGN;
 		__entry->cnt = cnt;
+		__entry->paged = paged;
 	),
-	TP_printk(DEV_PR_FMT "%d", DEV_PR_ARG, __entry->cnt)
+	TP_printk(DEV_PR_FMT "cnt:%d paged:%d",
+		  DEV_PR_ARG, __entry->cnt, __entry->paged)
 );
 
 DEFINE_EVENT(dev_simple_evt, set_key,