@@ -44,14 +44,28 @@
*/
#define TXGBE_RX_HDR_SIZE TXGBE_RXBUFFER_256
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define TXGBE_RX_BUFFER_WRITE 16 /* Must be power of 2 */
+#define TXGBE_RX_DMA_ATTR \
+ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
#define TXGBE_MAX_RX_DESC_POLL 10
+#ifndef MAX_SKB_FRAGS
+#define DESC_NEEDED 4
+#elif (MAX_SKB_FRAGS < 16)
+#define DESC_NEEDED ((MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE)) + 4)
+#else
+#define DESC_NEEDED (MAX_SKB_FRAGS + 4)
+#endif
+
/* wrapper around a pointer to a socket buffer,
* so a DMA handle can be stored along with the buffer
*/
struct txgbe_tx_buffer {
union txgbe_tx_desc *next_to_watch;
struct sk_buff *skb;
+ unsigned int bytecount;
DEFINE_DMA_UNMAP_ADDR(dma);
DEFINE_DMA_UNMAP_LEN(len);
};
@@ -61,6 +75,22 @@ struct txgbe_rx_buffer {
dma_addr_t dma;
dma_addr_t page_dma;
struct page *page;
+ unsigned int page_offset;
+};
+
+struct txgbe_queue_stats {
+ u64 packets;
+ u64 bytes;
+};
+
+struct txgbe_tx_queue_stats {
+ u64 restart_queue;
+};
+
+struct txgbe_rx_queue_stats {
+ u64 non_eop_descs;
+ u64 alloc_rx_page_failed;
+ u64 alloc_rx_buff_failed;
};
struct txgbe_ring {
@@ -85,6 +115,13 @@ struct txgbe_ring {
u16 next_to_clean;
u16 rx_buf_len;
u16 next_to_alloc;
+ struct txgbe_queue_stats stats;
+ struct u64_stats_sync syncp;
+
+ union {
+ struct txgbe_tx_queue_stats tx_stats;
+ struct txgbe_rx_queue_stats rx_stats;
+ };
} ____cacheline_internodealigned_in_smp;
#define TXGBE_MAX_FDIR_INDICES 63
@@ -112,8 +149,11 @@ static inline unsigned int txgbe_rx_pg_order(struct txgbe_ring __maybe_unused *r
struct txgbe_ring_container {
struct txgbe_ring *ring; /* pointer to linked list of rings */
+ unsigned int total_bytes; /* total bytes processed this int */
+ unsigned int total_packets; /* total packets processed this int */
u16 work_limit; /* total work allowed per interrupt */
u8 count; /* total number of rings in vector */
+ u8 itr; /* current ITR setting for ring */
};
/* iterator for handling rings in ring container */
@@ -151,6 +191,27 @@ struct txgbe_q_vector {
#define TXGBE_16K_ITR 248
#define TXGBE_12K_ITR 336
+/* txgbe_test_staterr - tests bits in Rx descriptor status and error fields */
+static inline __le32 txgbe_test_staterr(union txgbe_rx_desc *rx_desc,
+ const u32 stat_err_bits)
+{
+ return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
+}
+
+/* txgbe_desc_unused - calculate if we have unused descriptors */
+static inline u16 txgbe_desc_unused(struct txgbe_ring *ring)
+{
+ u16 ntc = ring->next_to_clean;
+ u16 ntu = ring->next_to_use;
+
+ return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
+}
+
+#define TXGBE_RX_DESC(R, i) \
+ (&(((union txgbe_rx_desc *)((R)->desc))[i]))
+#define TXGBE_TX_DESC(R, i) \
+ (&(((union txgbe_tx_desc *)((R)->desc))[i]))
+
#define TXGBE_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */
#define TCP_TIMER_VECTOR 0
@@ -233,6 +294,7 @@ struct txgbe_adapter {
/* RX */
struct txgbe_ring *rx_ring[TXGBE_MAX_RX_QUEUES];
+
struct txgbe_q_vector *q_vector[MAX_MSIX_Q_VECTORS];
int num_q_vectors; /* current number of q_vectors for device */
@@ -329,6 +391,7 @@ void txgbe_set_interrupt_capability(struct txgbe_adapter *adapter);
void txgbe_clear_interrupt_scheme(struct txgbe_adapter *adapter);
void txgbe_unmap_and_free_tx_resource(struct txgbe_ring *ring,
struct txgbe_tx_buffer *tx_buffer);
+void txgbe_alloc_rx_buffers(struct txgbe_ring *rx_ring, u16 cleaned_count);
void txgbe_configure_port(struct txgbe_adapter *adapter);
void txgbe_set_rx_mode(struct net_device *netdev);
int txgbe_write_mc_addr_list(struct net_device *netdev);
@@ -204,6 +204,653 @@ void txgbe_unmap_and_free_tx_resource(struct txgbe_ring *ring,
/* tx_buffer must be completely set up in the transmit path */
}
+/**
+ * txgbe_clean_tx_irq - Reclaim resources after transmit completes
+ * @q_vector: structure containing interrupt and ring information
+ * @tx_ring: tx ring to clean
+ **/
+static bool txgbe_clean_tx_irq(struct txgbe_q_vector *q_vector,
+ struct txgbe_ring *tx_ring)
+{
+ struct txgbe_adapter *adapter = q_vector->adapter;
+ unsigned int total_bytes = 0, total_packets = 0;
+ unsigned int budget = q_vector->tx.work_limit;
+ unsigned int i = tx_ring->next_to_clean;
+ struct txgbe_tx_buffer *tx_buffer;
+ union txgbe_tx_desc *tx_desc;
+
+ if (test_bit(__TXGBE_DOWN, &adapter->state))
+ return true;
+
+ tx_buffer = &tx_ring->tx_buffer_info[i];
+ tx_desc = TXGBE_TX_DESC(tx_ring, i);
+ i -= tx_ring->count;
+
+ do {
+ union txgbe_tx_desc *eop_desc = tx_buffer->next_to_watch;
+
+ /* if next_to_watch is not set then there is no work pending */
+ if (!eop_desc)
+ break;
+
+ /* prevent any other reads prior to eop_desc */
+ smp_rmb();
+
+ /* if DD is not set pending work has not been completed */
+ if (!(eop_desc->wb.status & cpu_to_le32(TXGBE_TXD_STAT_DD)))
+ break;
+
+ /* clear next_to_watch to prevent false hangs */
+ tx_buffer->next_to_watch = NULL;
+
+ /* update the statistics for this packet */
+ total_bytes += tx_buffer->bytecount;
+
+ /* free the skb */
+ dev_consume_skb_any(tx_buffer->skb);
+
+ /* unmap skb header data */
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+
+ /* clear tx_buffer data */
+ tx_buffer->skb = NULL;
+ dma_unmap_len_set(tx_buffer, len, 0);
+
+ /* unmap remaining buffers */
+ while (tx_desc != eop_desc) {
+ tx_buffer++;
+ tx_desc++;
+ i++;
+ if (unlikely(!i)) {
+ i -= tx_ring->count;
+ tx_buffer = tx_ring->tx_buffer_info;
+ tx_desc = TXGBE_TX_DESC(tx_ring, 0);
+ }
+
+ /* unmap any remaining paged data */
+ if (dma_unmap_len(tx_buffer, len)) {
+ dma_unmap_page(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buffer, len, 0);
+ }
+ }
+
+ /* move us one more past the eop_desc for start of next pkt */
+ tx_buffer++;
+ tx_desc++;
+ i++;
+ if (unlikely(!i)) {
+ i -= tx_ring->count;
+ tx_buffer = tx_ring->tx_buffer_info;
+ tx_desc = TXGBE_TX_DESC(tx_ring, 0);
+ }
+
+ /* issue prefetch for next Tx descriptor */
+ prefetch(tx_desc);
+
+ /* update budget accounting */
+ budget--;
+ } while (likely(budget));
+
+ i += tx_ring->count;
+ tx_ring->next_to_clean = i;
+ u64_stats_update_begin(&tx_ring->syncp);
+ tx_ring->stats.bytes += total_bytes;
+ tx_ring->stats.packets += total_packets;
+ u64_stats_update_end(&tx_ring->syncp);
+ q_vector->tx.total_bytes += total_bytes;
+ q_vector->tx.total_packets += total_packets;
+
+ netdev_tx_completed_queue(txring_txq(tx_ring),
+ total_packets, total_bytes);
+
+#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+ if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
+ (txgbe_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
+ /* Make sure that anybody stopping the queue after this
+ * sees the new next_to_clean.
+ */
+ smp_mb();
+
+ if (__netif_subqueue_stopped(tx_ring->netdev,
+ tx_ring->queue_index) &&
+ !test_bit(__TXGBE_DOWN, &adapter->state)) {
+ netif_wake_subqueue(tx_ring->netdev,
+ tx_ring->queue_index);
+ ++tx_ring->tx_stats.restart_queue;
+ }
+ }
+
+ return !!budget;
+}
+
+static bool txgbe_alloc_mapped_page(struct txgbe_ring *rx_ring,
+ struct txgbe_rx_buffer *bi)
+{
+ struct page *page = bi->page;
+ dma_addr_t dma;
+
+ /* since we are recycling buffers we should seldom need to alloc */
+ if (likely(page))
+ return true;
+
+ /* alloc new page for storage */
+ page = dev_alloc_pages(txgbe_rx_pg_order(rx_ring));
+ if (unlikely(!page)) {
+ rx_ring->rx_stats.alloc_rx_page_failed++;
+ return false;
+ }
+
+ /* map page for use */
+ dma = dma_map_page(rx_ring->dev, page, 0,
+ txgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE);
+
+ /* if mapping failed free memory back to system since
+ * there isn't much point in holding memory we can't use
+ */
+ if (dma_mapping_error(rx_ring->dev, dma)) {
+ __free_pages(page, txgbe_rx_pg_order(rx_ring));
+
+ rx_ring->rx_stats.alloc_rx_page_failed++;
+ return false;
+ }
+
+ bi->page_dma = dma;
+ bi->page = page;
+ bi->page_offset = 0;
+
+ return true;
+}
+
+/**
+ * txgbe_alloc_rx_buffers - Replace used receive buffers
+ * @rx_ring: ring to place buffers on
+ * @cleaned_count: number of buffers to replace
+ **/
+void txgbe_alloc_rx_buffers(struct txgbe_ring *rx_ring, u16 cleaned_count)
+{
+ union txgbe_rx_desc *rx_desc;
+ struct txgbe_rx_buffer *bi;
+ u16 i = rx_ring->next_to_use;
+
+ /* nothing to do */
+ if (!cleaned_count)
+ return;
+
+ rx_desc = TXGBE_RX_DESC(rx_ring, i);
+ bi = &rx_ring->rx_buffer_info[i];
+ i -= rx_ring->count;
+
+ do {
+ if (!txgbe_alloc_mapped_page(rx_ring, bi))
+ break;
+ rx_desc->read.pkt_addr =
+ cpu_to_le64(bi->page_dma + bi->page_offset);
+
+ rx_desc++;
+ bi++;
+ i++;
+ if (unlikely(!i)) {
+ rx_desc = TXGBE_RX_DESC(rx_ring, 0);
+ bi = rx_ring->rx_buffer_info;
+ i -= rx_ring->count;
+ }
+
+ /* clear the status bits for the next_to_use descriptor */
+ rx_desc->wb.upper.status_error = 0;
+
+ cleaned_count--;
+ } while (cleaned_count);
+
+ i += rx_ring->count;
+
+ if (rx_ring->next_to_use != i) {
+ rx_ring->next_to_use = i;
+ /* update next to alloc since we have filled the ring */
+ rx_ring->next_to_alloc = i;
+
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64).
+ */
+ wmb();
+ writel(i, rx_ring->tail);
+ }
+}
+
+/**
+ * txgbe_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate fields within the skb.
+ **/
+static void txgbe_process_skb_fields(struct txgbe_ring *rx_ring,
+ union txgbe_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ skb_record_rx_queue(skb, rx_ring->queue_index);
+
+ skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+}
+
+static void txgbe_rx_skb(struct txgbe_q_vector *q_vector,
+ struct sk_buff *skb)
+{
+ napi_gro_receive(&q_vector->napi, skb);
+}
+
+/**
+ * txgbe_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ * @skb: Current socket buffer containing buffer in progress
+ *
+ * This function updates next to clean. If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ **/
+static bool txgbe_is_non_eop(struct txgbe_ring *rx_ring,
+ union txgbe_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ u32 ntc = rx_ring->next_to_clean + 1;
+
+ /* fetch, update, and store next to clean */
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+
+ prefetch(TXGBE_RX_DESC(rx_ring, ntc));
+
+ /* if we are the last buffer then there is nothing else to do */
+ if (likely(txgbe_test_staterr(rx_desc, TXGBE_RXD_STAT_EOP)))
+ return false;
+
+ rx_ring->rx_buffer_info[ntc].skb = skb;
+ rx_ring->rx_stats.non_eop_descs++;
+
+ return true;
+}
+
+static void txgbe_pull_tail(struct sk_buff *skb)
+{
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+ unsigned int pull_len;
+ unsigned char *va;
+
+ /* it is valid to use page_address instead of kmap since we are
+ * working with pages allocated out of the lomem pool per
+ * alloc_page(GFP_ATOMIC)
+ */
+ va = skb_frag_address(frag);
+
+ /* we need the header to contain the greater of either ETH_HLEN or
+ * 60 bytes if the skb->len is less than 60 for skb_pad.
+ */
+ pull_len = eth_get_headlen(skb->dev, va, TXGBE_RX_HDR_SIZE);
+
+ /* align pull length to size of long to optimize memcpy performance */
+ skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
+
+ /* update all of the pointers */
+ skb_frag_size_sub(frag, pull_len);
+ skb_frag_off_add(frag, pull_len);
+ skb->data_len -= pull_len;
+ skb->tail += pull_len;
+}
+
+/**
+ * txgbe_dma_sync_frag - perform DMA sync for first frag of SKB
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being updated
+ *
+ * This function provides a basic DMA sync up for the first fragment of an
+ * skb. The reason for doing this is that the first fragment cannot be
+ * unmapped until we have reached the end of packet descriptor for a buffer
+ * chain.
+ */
+static void txgbe_dma_sync_frag(struct txgbe_ring *rx_ring,
+ struct sk_buff *skb)
+{
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ TXGBE_CB(skb)->dma,
+ skb_frag_off(frag),
+ skb_frag_size(frag),
+ DMA_FROM_DEVICE);
+
+ /* If the page was released, just unmap it. */
+ if (unlikely(TXGBE_CB(skb)->page_released)) {
+ dma_unmap_page_attrs(rx_ring->dev, TXGBE_CB(skb)->dma,
+ txgbe_rx_pg_size(rx_ring),
+ DMA_FROM_DEVICE,
+ TXGBE_RX_DMA_ATTR);
+ }
+}
+
+/**
+ * txgbe_cleanup_headers - Correct corrupted or empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being fixed
+ *
+ * Check for corrupted packet headers caused by senders on the local L2
+ * embedded NIC switch not setting up their Tx Descriptors right. These
+ * should be very rare.
+ *
+ * Also address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ **/
+static bool txgbe_cleanup_headers(struct txgbe_ring *rx_ring,
+ union txgbe_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct net_device *netdev = rx_ring->netdev;
+
+ /* verify that the packet does not have any known errors */
+ if (unlikely(txgbe_test_staterr(rx_desc,
+ TXGBE_RXD_ERR_FRAME_ERR_MASK) &&
+ !(netdev->features & NETIF_F_RXALL))) {
+ dev_kfree_skb_any(skb);
+ return true;
+ }
+
+ /* place header in linear portion of buffer */
+ if (skb_is_nonlinear(skb) && !skb_headlen(skb))
+ txgbe_pull_tail(skb);
+
+ /* if eth_skb_pad returns an error the skb was freed */
+ if (eth_skb_pad(skb))
+ return true;
+
+ return false;
+}
+
+/**
+ * txgbe_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ **/
+static void txgbe_reuse_rx_page(struct txgbe_ring *rx_ring,
+ struct txgbe_rx_buffer *old_buff)
+{
+ struct txgbe_rx_buffer *new_buff;
+ u16 nta = rx_ring->next_to_alloc;
+
+ new_buff = &rx_ring->rx_buffer_info[nta];
+
+ /* update, and store next to alloc */
+ nta++;
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+ /* transfer page from old buffer to new buffer */
+ new_buff->page_dma = old_buff->page_dma;
+ new_buff->page = old_buff->page;
+ new_buff->page_offset = old_buff->page_offset;
+
+ /* sync the buffer for use by the device */
+ dma_sync_single_range_for_device(rx_ring->dev, new_buff->page_dma,
+ new_buff->page_offset,
+ txgbe_rx_bufsz(rx_ring),
+ DMA_FROM_DEVICE);
+}
+
+static inline bool txgbe_page_is_reserved(struct page *page)
+{
+ return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
+}
+
+/**
+ * txgbe_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: buffer containing page to add
+ * @rx_desc: descriptor containing length of buffer written by hardware
+ * @skb: sk_buff to place the data into
+ *
+ * This function will add the data contained in rx_buffer->page to the skb.
+ * This is done either through a direct copy if the data in the buffer is
+ * less than the skb header size, otherwise it will just attach the page as
+ * a frag to the skb.
+ *
+ * The function will then update the page offset if necessary and return
+ * true if the buffer can be reused by the adapter.
+ **/
+static bool txgbe_add_rx_frag(struct txgbe_ring *rx_ring,
+ struct txgbe_rx_buffer *rx_buffer,
+ union txgbe_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct page *page = rx_buffer->page;
+ unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = txgbe_rx_bufsz(rx_ring);
+#else
+ unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+ unsigned int last_offset = txgbe_rx_pg_size(rx_ring) -
+ txgbe_rx_bufsz(rx_ring);
+#endif
+
+ if (size <= TXGBE_RX_HDR_SIZE && !skb_is_nonlinear(skb)) {
+ unsigned char *va = page_address(page) + rx_buffer->page_offset;
+
+ memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+
+ /* page is not reserved, we can reuse buffer as-is */
+ if (likely(!txgbe_page_is_reserved(page)))
+ return true;
+
+ /* this page cannot be reused so discard it */
+ __free_pages(page, txgbe_rx_pg_order(rx_ring));
+ return false;
+ }
+
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+ rx_buffer->page_offset, size, truesize);
+
+ /* avoid re-using remote pages */
+ if (unlikely(txgbe_page_is_reserved(page)))
+ return false;
+
+#if (PAGE_SIZE < 8192)
+ /* if we are only owner of page we can reuse it */
+ if (unlikely(page_count(page) != 1))
+ return false;
+
+ /* flip page offset to other buffer */
+ rx_buffer->page_offset ^= truesize;
+#else
+ /* move offset up to the next cache line */
+ rx_buffer->page_offset += truesize;
+
+ if (rx_buffer->page_offset > last_offset)
+ return false;
+#endif
+
+ /* Even if we own the page, we are not allowed to use atomic_set()
+ * This would break get_page_unless_zero() users.
+ */
+ page_ref_inc(page);
+
+ return true;
+}
+
+static struct sk_buff *txgbe_fetch_rx_buffer(struct txgbe_ring *rx_ring,
+ union txgbe_rx_desc *rx_desc)
+{
+ struct txgbe_rx_buffer *rx_buffer;
+ struct sk_buff *skb;
+ struct page *page;
+
+ rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+ page = rx_buffer->page;
+ prefetchw(page);
+
+ skb = rx_buffer->skb;
+
+ if (likely(!skb)) {
+ void *page_addr = page_address(page) +
+ rx_buffer->page_offset;
+
+ /* prefetch first cache line of first page */
+ prefetch(page_addr);
+#if L1_CACHE_BYTES < 128
+ prefetch(page_addr + L1_CACHE_BYTES);
+#endif
+
+ /* allocate a skb to store the frags */
+ skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
+ TXGBE_RX_HDR_SIZE);
+ if (unlikely(!skb)) {
+ rx_ring->rx_stats.alloc_rx_buff_failed++;
+ return NULL;
+ }
+
+ /* we will be copying header into skb->data in
+ * pskb_may_pull so it is in our interest to prefetch
+ * it now to avoid a possible cache miss
+ */
+ prefetchw(skb->data);
+
+ /* Delay unmapping of the first packet. It carries the
+ * header information, HW may still access the header
+ * after the writeback. Only unmap it when EOP is
+ * reached
+ */
+ if (likely(txgbe_test_staterr(rx_desc, TXGBE_RXD_STAT_EOP)))
+ goto dma_sync;
+
+ TXGBE_CB(skb)->dma = rx_buffer->page_dma;
+ } else {
+ if (txgbe_test_staterr(rx_desc, TXGBE_RXD_STAT_EOP))
+ txgbe_dma_sync_frag(rx_ring, skb);
+
+dma_sync:
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_buffer->page_dma,
+ rx_buffer->page_offset,
+ txgbe_rx_bufsz(rx_ring),
+ DMA_FROM_DEVICE);
+
+ rx_buffer->skb = NULL;
+ }
+
+ /* pull page into skb */
+ if (txgbe_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
+ /* hand second half of page back to the ring */
+ txgbe_reuse_rx_page(rx_ring, rx_buffer);
+ } else if (TXGBE_CB(skb)->dma == rx_buffer->page_dma) {
+ /* the page has been released from the ring */
+ TXGBE_CB(skb)->page_released = true;
+ } else {
+ /* we are not reusing the buffer so unmap it */
+ dma_unmap_page(rx_ring->dev, rx_buffer->page_dma,
+ txgbe_rx_pg_size(rx_ring),
+ DMA_FROM_DEVICE);
+ }
+
+ /* clear contents of buffer_info */
+ rx_buffer->page = NULL;
+
+ return skb;
+}
+
+/**
+ * txgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
+ * @q_vector: structure containing interrupt and ring information
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @budget: Total limit on number of packets to process
+ *
+ * This function provides a "bounce buffer" approach to Rx interrupt
+ * processing. The advantage to this is that on systems that have
+ * expensive overhead for IOMMU access this provides a means of avoiding
+ * it by maintaining the mapping of the page to the system.
+ *
+ * Returns amount of work completed.
+ **/
+static int txgbe_clean_rx_irq(struct txgbe_q_vector *q_vector,
+ struct txgbe_ring *rx_ring,
+ int budget)
+{
+ unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+ u16 cleaned_count = txgbe_desc_unused(rx_ring);
+
+ do {
+ union txgbe_rx_desc *rx_desc;
+ struct sk_buff *skb;
+
+ /* return some buffers to hardware, one at a time is too slow */
+ if (cleaned_count >= TXGBE_RX_BUFFER_WRITE) {
+ txgbe_alloc_rx_buffers(rx_ring, cleaned_count);
+ cleaned_count = 0;
+ }
+
+ rx_desc = TXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+ if (!txgbe_test_staterr(rx_desc, TXGBE_RXD_STAT_DD))
+ break;
+
+ /* This memory barrier is needed to keep us from reading
+ * any other fields out of the rx_desc until we know the
+ * descriptor has been written back
+ */
+ dma_rmb();
+
+ /* retrieve a buffer from the ring */
+ skb = txgbe_fetch_rx_buffer(rx_ring, rx_desc);
+
+ /* exit if we failed to retrieve a buffer */
+ if (!skb)
+ break;
+
+ cleaned_count++;
+
+ /* place incomplete frames back on ring for completion */
+ if (txgbe_is_non_eop(rx_ring, rx_desc, skb))
+ continue;
+
+ /* verify the packet layout is correct */
+ if (txgbe_cleanup_headers(rx_ring, rx_desc, skb))
+ continue;
+
+ /* probably a little skewed due to removing CRC */
+ total_rx_bytes += skb->len;
+
+ txgbe_process_skb_fields(rx_ring, rx_desc, skb);
+
+ txgbe_rx_skb(q_vector, skb);
+
+ /* update budget accounting */
+ total_rx_packets++;
+ } while (likely(total_rx_packets < budget));
+
+ u64_stats_update_begin(&rx_ring->syncp);
+ rx_ring->stats.packets += total_rx_packets;
+ rx_ring->stats.bytes += total_rx_bytes;
+ u64_stats_update_end(&rx_ring->syncp);
+ q_vector->rx.total_packets += total_rx_packets;
+ q_vector->rx.total_bytes += total_rx_bytes;
+
+ return total_rx_packets;
+}
+
/**
* txgbe_configure_msix - Configure MSI-X hardware
* @adapter: board private structure
@@ -239,6 +886,74 @@ static void txgbe_configure_msix(struct txgbe_adapter *adapter)
wr32(&adapter->hw, TXGBE_PX_ITR(v_idx), 1950);
}
+enum latency_range {
+ lowest_latency = 0,
+ low_latency = 1,
+ bulk_latency = 2,
+ latency_invalid = 255
+};
+
+/**
+ * txgbe_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
+ * @ring_container: structure containing ring performance data
+ *
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt. The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern. Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
+ * while increasing bulk throughput.
+ **/
+static void txgbe_update_itr(struct txgbe_q_vector *q_vector,
+ struct txgbe_ring_container *ring_container)
+{
+ int packets = ring_container->total_packets;
+ int bytes = ring_container->total_bytes;
+ u8 itr_setting = ring_container->itr;
+ u32 timepassed_us;
+ u64 bytes_perint;
+
+ if (packets == 0)
+ return;
+
+ /* simple throttlerate management
+ * 0-10MB/s lowest (100000 ints/s)
+ * 10-20MB/s low (20000 ints/s)
+ * 20-1249MB/s bulk (12000 ints/s)
+ */
+ /* what was last interrupt timeslice? */
+ timepassed_us = q_vector->itr >> 2;
+ if (timepassed_us == 0)
+ return;
+ bytes_perint = bytes / timepassed_us; /* bytes/usec */
+
+ switch (itr_setting) {
+ case lowest_latency:
+ if (bytes_perint > 10)
+ itr_setting = low_latency;
+ break;
+ case low_latency:
+ if (bytes_perint > 20)
+ itr_setting = bulk_latency;
+ else if (bytes_perint <= 10)
+ itr_setting = lowest_latency;
+ break;
+ case bulk_latency:
+ if (bytes_perint <= 20)
+ itr_setting = low_latency;
+ break;
+ }
+
+ /* clear work counters since we have the values we need */
+ ring_container->total_bytes = 0;
+ ring_container->total_packets = 0;
+
+ /* write updated itr to ring container */
+ ring_container->itr = itr_setting;
+}
+
/**
* txgbe_write_eitr - write EITR register in hardware specific way
* @q_vector: structure containing interrupt and ring information
@@ -259,6 +974,43 @@ void txgbe_write_eitr(struct txgbe_q_vector *q_vector)
wr32(hw, TXGBE_PX_ITR(v_idx), itr_reg);
}
+static void txgbe_set_itr(struct txgbe_q_vector *q_vector)
+{
+ u16 new_itr = q_vector->itr;
+ u8 current_itr;
+
+ txgbe_update_itr(q_vector, &q_vector->tx);
+ txgbe_update_itr(q_vector, &q_vector->rx);
+
+ current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
+
+ switch (current_itr) {
+ /* counts and packets in update_itr are dependent on these numbers */
+ case lowest_latency:
+ new_itr = TXGBE_100K_ITR;
+ break;
+ case low_latency:
+ new_itr = TXGBE_20K_ITR;
+ break;
+ case bulk_latency:
+ new_itr = TXGBE_12K_ITR;
+ break;
+ default:
+ break;
+ }
+
+ if (new_itr != q_vector->itr) {
+ /* do an exponential smoothing */
+ new_itr = (10 * new_itr * q_vector->itr) /
+ ((9 * new_itr) + q_vector->itr);
+
+ /* save the algorithm value here */
+ q_vector->itr = new_itr;
+
+ txgbe_write_eitr(q_vector);
+ }
+}
+
/**
* txgbe_check_overtemp_subtask - check for over temperature
* @adapter: pointer to adapter
@@ -473,6 +1225,50 @@ static irqreturn_t txgbe_msix_clean_rings(int __always_unused irq, void *data)
**/
int txgbe_poll(struct napi_struct *napi, int budget)
{
+ struct txgbe_q_vector *q_vector =
+ container_of(napi, struct txgbe_q_vector, napi);
+ struct txgbe_adapter *adapter = q_vector->adapter;
+ struct txgbe_ring *ring;
+ int per_ring_budget;
+ bool clean_complete = true;
+
+ txgbe_for_each_ring(ring, q_vector->tx) {
+ if (!txgbe_clean_tx_irq(q_vector, ring))
+ clean_complete = false;
+ }
+
+ /* Exit if we are called by netpoll */
+ if (budget <= 0)
+ return budget;
+
+ /* attempt to distribute budget to each queue fairly, but don't allow
+ * the budget to go below 1 because we'll exit polling
+ */
+ if (q_vector->rx.count > 1)
+ per_ring_budget = max(budget / q_vector->rx.count, 1);
+ else
+ per_ring_budget = budget;
+
+ txgbe_for_each_ring(ring, q_vector->rx) {
+ int cleaned = txgbe_clean_rx_irq(q_vector, ring,
+ per_ring_budget);
+
+ if (cleaned >= per_ring_budget)
+ clean_complete = false;
+ }
+
+ /* If all work not completed, return budget and keep polling */
+ if (!clean_complete)
+ return budget;
+
+ /* all work done, exit the polling mode */
+ napi_complete(napi);
+ if (adapter->rx_itr_setting == 1)
+ txgbe_set_itr(q_vector);
+ if (!test_bit(__TXGBE_DOWN, &adapter->state))
+ txgbe_intr_enable(&adapter->hw,
+ TXGBE_INTR_Q(q_vector->v_idx));
+
return 0;
}
@@ -933,6 +1729,7 @@ void txgbe_configure_rx_ring(struct txgbe_adapter *adapter,
TXGBE_PX_RR_CFG_RR_EN, TXGBE_PX_RR_CFG_RR_EN);
txgbe_rx_desc_queue_enable(adapter, ring);
+ txgbe_alloc_rx_buffers(ring, txgbe_desc_unused(ring));
}
static void txgbe_setup_psrtype(struct txgbe_adapter *adapter)
@@ -2760,6 +3557,8 @@ static int txgbe_probe(struct pci_dev *pdev,
u16 eeprom_verh = 0, eeprom_verl = 0, offset = 0;
u16 eeprom_cfg_blkh = 0, eeprom_cfg_blkl = 0;
u16 build = 0, major = 0, patch = 0;
+ u16 ctl = 0;
+ char *info_string, *i_s_var;
u8 part_str[TXGBE_PBANUM_LENGTH];
u32 etrack_id = 0;
@@ -2786,6 +3585,13 @@ static int txgbe_probe(struct pci_dev *pdev,
pci_enable_pcie_error_reporting(pdev);
pci_set_master(pdev);
+ pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &ctl);
+ if (((ctl & PCI_EXP_DEVCTL_READRQ) != PCI_EXP_DEVCTL_READRQ_128B) &&
+ ((ctl & PCI_EXP_DEVCTL_READRQ) != PCI_EXP_DEVCTL_READRQ_256B))
+ pcie_capability_clear_and_set_word(pdev, PCI_EXP_DEVCTL,
+ PCI_EXP_DEVCTL_READRQ,
+ PCI_EXP_DEVCTL_READRQ_256B);
+
netdev = devm_alloc_etherdev_mqs(&pdev->dev,
sizeof(struct txgbe_adapter),
TXGBE_MAX_TX_QUEUES,
@@ -2985,6 +3791,23 @@ static int txgbe_probe(struct pci_dev *pdev,
netdev->dev_addr[2], netdev->dev_addr[3],
netdev->dev_addr[4], netdev->dev_addr[5]);
+#define INFO_STRING_LEN 255
+ info_string = kzalloc(INFO_STRING_LEN, GFP_KERNEL);
+ if (!info_string) {
+ netif_err(adapter, probe, netdev,
+ "allocation for info string failed\n");
+ goto no_info_string;
+ }
+ i_s_var = info_string;
+ i_s_var += sprintf(info_string, "Enabled Features: ");
+ i_s_var += sprintf(i_s_var, "RxQ: %d TxQ: %d ",
+ adapter->num_rx_queues, adapter->num_tx_queues);
+
+ WARN_ON(i_s_var > (info_string + INFO_STRING_LEN));
+ /* end features printing */
+ netif_info(adapter, probe, netdev, "%s\n", info_string);
+ kfree(info_string);
+no_info_string:
/* firmware requires blank driver version */
TCALL(hw, mac.ops.set_fw_drv_ver, 0xFF, 0xFF, 0xFF, 0xFF);
@@ -955,6 +955,19 @@ enum {
#define TXGBE_PCIDEVCTRL2_4_8s 0xd
#define TXGBE_PCIDEVCTRL2_17_34s 0xe
+/******************* Receive Descriptor bit definitions **********************/
+#define TXGBE_RXD_STAT_DD 0x00000001U /* Done */
+#define TXGBE_RXD_STAT_EOP 0x00000002U /* End of Packet */
+
+#define TXGBE_RXD_ERR_MASK 0xfff00000U /* RDESC.ERRORS mask */
+#define TXGBE_RXD_ERR_RXE 0x20000000U /* Any MAC Error */
+
+/* Masks to determine if packets should be dropped due to frame errors */
+#define TXGBE_RXD_ERR_FRAME_ERR_MASK TXGBE_RXD_ERR_RXE
+
+/*********************** Transmit Descriptor Config Masks ****************/
+#define TXGBE_TXD_STAT_DD 0x00000001U /* Descriptor Done */
+
/* Transmit Descriptor */
union txgbe_tx_desc {
struct {
Support to clean all queues associated with a q_vector, in NAPI polling. Add to simple receive packets, without hardware features. Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com> --- drivers/net/ethernet/wangxun/txgbe/txgbe.h | 63 ++ .../net/ethernet/wangxun/txgbe/txgbe_main.c | 823 ++++++++++++++++++ .../net/ethernet/wangxun/txgbe/txgbe_type.h | 13 + 3 files changed, 899 insertions(+)