diff mbox

[RFC,04/10] Tcp general data coalescing, the parameters is a little bit horrible, it's complicated to read, should can be optimized later.

Message ID 1453760690-21221-5-git-send-email-wexu@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wei Xu Jan. 25, 2016, 10:24 p.m. UTC
Signed-off-by: Wei Xu <wexu@redhat.com>
---
 hw/net/virtio-net.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 124 insertions(+), 1 deletion(-)
diff mbox

Patch

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 7a6cd4c..d005a56 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -41,6 +41,10 @@ 
 
 #define VIRTIO_HEADER   12    /* Virtio net header size */
 #define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header))
+#define TCP_WINDOW      65535
+
+/* ip4 max payload, 16 bits in the header */
+#define MAX_IP4_PAYLOAD  (65535 - sizeof(struct ip_header))
 
 #define MAX_VIRTIO_IP_PAYLOAD  (65535 + IP_OFFSET)
 
@@ -1670,11 +1674,130 @@  out:
     return 0;
 }
 
+static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, NetRscSeg *seg,
+                                 const uint8_t *buf, struct tcp_header *n_tcp,
+                                 struct tcp_header *o_tcp)
+{
+    uint32_t nack, oack;
+    uint16_t nwin, owin;
+
+    nack = htonl(n_tcp->th_ack);
+    nwin = htons(n_tcp->th_win);
+    oack = htonl(o_tcp->th_ack);
+    owin = htons(o_tcp->th_win);
+
+    if ((nack - oack) >= TCP_WINDOW) {
+        return RSC_FINAL;
+    } else if (nack == oack) {
+        /* duplicated ack or window probe */
+        if (nwin == owin) {
+            /* duplicated ack, add dup ack count due to whql test up to 1 */
+
+            if (seg->dup_ack_count == 0) {
+                seg->dup_ack_count++;
+                return RSC_COALESCE;
+            } else {
+                /* Spec says should send it directly */
+                return RSC_FINAL;
+            }
+        } else {
+            /* Coalesce window update */
+            o_tcp->th_win = n_tcp->th_win;
+            return RSC_COALESCE;
+        }
+    } else {
+        /* pure ack, update ack */
+        o_tcp->th_ack = n_tcp->th_ack;
+        return RSC_COALESCE;
+    }
+}
+
+static int32_t virtio_net_rsc_coalesce_tcp(NetRscChain *chain, NetRscSeg *seg,
+               const uint8_t *buf, struct tcp_header *n_tcp, uint16_t n_tcp_len,
+               uint16_t n_data, struct tcp_header *o_tcp, uint16_t o_tcp_len,
+               uint16_t o_data, uint16_t *p_ip_len, uint16_t max_data)
+{
+    void *data;
+    uint16_t o_ip_len;
+    uint32_t nseq, oseq;
+
+    o_ip_len = htons(*p_ip_len);
+    nseq = htonl(n_tcp->th_seq);
+    oseq = htonl(o_tcp->th_seq);
+
+    /* Ignore packet with more/larger tcp options */
+    if (n_tcp_len > o_tcp_len) {
+        return RSC_FINAL;
+    }
+
+    /* out of order or retransmitted. */
+    if ((nseq - oseq) > TCP_WINDOW) {
+        return RSC_FINAL;
+    }
+
+    data = ((uint8_t *)n_tcp) + n_tcp_len;
+    if (nseq == oseq) {
+        if ((0 == o_data) && n_data) {
+            /* From no payload to payload, normal case, not a dup ack or etc */
+            goto coalesce;
+        } else {
+            return virtio_net_rsc_handle_ack(chain, seg, buf, n_tcp, o_tcp);
+        }
+    } else if ((nseq - oseq) != o_data) {
+        /* Not a consistent packet, out of order */
+        return RSC_FINAL;
+    } else {
+coalesce:
+        if ((o_ip_len + n_data) > max_data) {
+            return RSC_FINAL;
+        }
+
+        /* Here comes the right data, the payload lengh in v4/v6 is different,
+           so use the field value to update */
+        *p_ip_len = htons(o_ip_len + n_data); /* Update new data len */
+        o_tcp->th_offset_flags = n_tcp->th_offset_flags; /* Bring 'PUSH' big */
+        o_tcp->th_ack = n_tcp->th_ack;
+        o_tcp->th_win = n_tcp->th_win;
+
+        memmove(seg->buf + seg->size, data, n_data);
+        seg->size += n_data;
+        return RSC_COALESCE;
+    }
+}
 
 static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain,
                        NetRscSeg *seg, const uint8_t *buf, size_t size)
 {
-    return RSC_FINAL;
+    uint16_t o_ip_len, n_ip_len;    /* len in ip header field */
+    uint16_t n_ip_hdrlen, o_ip_hdrlen;  /* ipv4 header len */
+    uint16_t n_tcp_len, o_tcp_len;  /* tcp header len */
+    uint16_t o_data, n_data;          /* payload without virtio/eth/ip/tcp */
+    struct ip_header *n_ip, *o_ip;
+    struct tcp_header *n_tcp, *o_tcp;
+
+    n_ip = (struct ip_header *)(buf + IP_OFFSET);
+    n_ip_hdrlen = ((0xF & n_ip->ip_ver_len) << 2);
+    n_ip_len = htons(n_ip->ip_len);
+    n_tcp = (struct tcp_header *)(((uint8_t *)n_ip) + n_ip_hdrlen);
+    n_tcp_len = (htons(n_tcp->th_offset_flags) & 0xF000) >> 10;
+    n_data = n_ip_len - n_ip_hdrlen - n_tcp_len;
+
+    o_ip = (struct ip_header *)(seg->buf + IP_OFFSET);
+    o_ip_hdrlen = ((0xF & o_ip->ip_ver_len) << 2);
+    o_ip_len = htons(o_ip->ip_len);
+    o_tcp = (struct tcp_header *)(((uint8_t *)o_ip) + o_ip_hdrlen);
+    o_tcp_len = (htons(o_tcp->th_offset_flags) & 0xF000) >> 10;
+    o_data = o_ip_len - o_ip_hdrlen - o_tcp_len;
+
+    if ((n_ip->ip_src ^ o_ip->ip_src) || (n_ip->ip_dst ^ o_ip->ip_dst)
+        || (n_tcp->th_sport ^ o_tcp->th_sport)
+        || (n_tcp->th_dport ^ o_tcp->th_dport)) {
+        return RSC_NO_MATCH;
+    }
+
+    return virtio_net_rsc_coalesce_tcp(chain, seg, buf,
+                                    n_tcp, n_tcp_len, n_data, o_tcp, o_tcp_len,
+                                    o_data, &o_ip->ip_len, MAX_IP4_PAYLOAD);
 }
 
 static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,