Message ID | 1454264009-24094-4-git-send-email-wexu@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Feb 01, 2016 at 02:13:22AM +0800, wexu@redhat.com wrote: > From: Wei Xu <wei@wei-thinkpad.nay.redhat.com> > > Upon a packet is arriving, a corresponding chain will be selected or created, > or be bypassed if it's not an IPv4 packets. > > The callback in the chain will be invoked to call the real coalescing. > > Since the coalescing is based on the TCP connection, so the packets will be > cached if there is no previous data within the same connection. > > The framework of IPv4 is also introduced. > > This patch depends on patch 2918cf2 (Detailed IPv4 and General TCP data > coalescing) > > Signed-off-by: Wei Xu <wexu@redhat.com> > --- > hw/net/virtio-net.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 172 insertions(+), 1 deletion(-) > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c > index 4e9458e..cfbac6d 100644 > --- a/hw/net/virtio-net.c > +++ b/hw/net/virtio-net.c > @@ -14,10 +14,12 @@ > #include "qemu/iov.h" > #include "hw/virtio/virtio.h" > #include "net/net.h" > +#include "net/eth.h" > #include "net/checksum.h" > #include "net/tap.h" > #include "qemu/error-report.h" > #include "qemu/timer.h" > +#include "qemu/sockets.h" > #include "hw/virtio/virtio-net.h" > #include "net/vhost_net.h" > #include "hw/virtio/virtio-bus.h" > @@ -37,6 +39,21 @@ > #define endof(container, field) \ > (offsetof(container, field) + sizeof(((container *)0)->field)) > > +#define VIRTIO_HEADER 12 /* Virtio net header size */ > +#define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header)) > + > +#define MAX_VIRTIO_IP_PAYLOAD (65535 + IP_OFFSET) > + > +/* Global statistics */ > +static uint32_t rsc_chain_no_mem; > + > +/* Switcher to enable/disable rsc */ > +static bool virtio_net_rsc_bypass; > + > +/* Coalesce callback for ipv4/6 */ > +typedef int32_t (VirtioNetCoalesce) (NetRscChain *chain, NetRscSeg *seg, > + const uint8_t *buf, size_t size); > + Since there are only 2 cases, it's probably better to just open-code if (v4) -> coalesce4 else if v6 -> coalesce6 > typedef struct VirtIOFeature { > uint32_t flags; > size_t end; > @@ -1019,7 +1036,8 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) > return 0; > } > > -static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size) > +static ssize_t virtio_net_do_receive(NetClientState *nc, > + const uint8_t *buf, size_t size) > { > VirtIONet *n = qemu_get_nic_opaque(nc); > VirtIONetQueue *q = virtio_net_get_subqueue(nc); > @@ -1623,6 +1641,159 @@ static void virtio_net_rsc_cleanup(VirtIONet *n) > } > } > > +static int virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc, > + const uint8_t *buf, size_t size) > +{ > + NetRscSeg *seg; > + > + seg = g_malloc(sizeof(NetRscSeg)); > + if (!seg) { > + return 0; > + } > + > + seg->buf = g_malloc(MAX_VIRTIO_IP_PAYLOAD); > + if (!seg->buf) { > + goto out; > + } > + > + memmove(seg->buf, buf, size); > + seg->size = size; > + seg->dup_ack_count = 0; > + seg->is_coalesced = 0; > + seg->nc = nc; > + > + QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); > + return size; > + > +out: > + g_free(seg); > + return 0; > +} > + > + > +static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain, > + NetRscSeg *seg, const uint8_t *buf, size_t size) > +{ > + /* This real part of this function will be introduced in next patch, just > + * return a 'final' to feed the compilation. */ > + return RSC_FINAL; > +} > + > +static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc, > + const uint8_t *buf, size_t size, VirtioNetCoalesce *coalesce) > +{ > + int ret; > + NetRscSeg *seg, *nseg; > + > + if (QTAILQ_EMPTY(&chain->buffers)) { > + if (!virtio_net_rsc_cache_buf(chain, nc, buf, size)) { > + return 0; > + } else { > + return size; > + } > + } > + > + QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { > + ret = coalesce(chain, seg, buf, size); > + if (RSC_FINAL == ret) { > + ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); > + QTAILQ_REMOVE(&chain->buffers, seg, next); > + g_free(seg->buf); > + g_free(seg); > + if (ret == 0) { > + /* Send failed */ > + return 0; > + } > + > + /* Send current packet */ > + return virtio_net_do_receive(nc, buf, size); > + } else if (RSC_NO_MATCH == ret) { > + continue; > + } else { > + /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ > + seg->is_coalesced = 1; > + return size; > + } > + } > + > + return virtio_net_rsc_cache_buf(chain, nc, buf, size); > +} > + > +static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc, > + const uint8_t *buf, size_t size) > +{ > + NetRscChain *chain; > + > + chain = (NetRscChain *)opq; > + return virtio_net_rsc_callback(chain, nc, buf, size, > + virtio_net_rsc_try_coalesce4); > +} > + > +static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc, > + uint16_t proto) > +{ > + VirtIONet *n; > + NetRscChain *chain; > + NICState *nic; > + > + /* Only handle IPv4/6 */ > + if (proto != (uint16_t)ETH_P_IP) { > + return NULL; > + } > + > + nic = (NICState *)nc; > + n = container_of(&nic, VirtIONet, nic); > + QTAILQ_FOREACH(chain, &n->rsc_chains, next) { > + if (chain->proto == proto) { > + return chain; > + } > + } > + > + chain = g_malloc(sizeof(*chain)); > + if (!chain) { > + rsc_chain_no_mem++; > + return NULL; > + } > + > + chain->proto = proto; > + chain->do_receive = virtio_net_rsc_receive4; > + > + QTAILQ_INIT(&chain->buffers); > + QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); > + return chain; > +} > + > +static ssize_t virtio_net_rsc_receive(NetClientState *nc, > + const uint8_t *buf, size_t size) > +{ > + uint16_t proto; > + NetRscChain *chain; > + struct eth_header *eth; > + > + if (size < IP_OFFSET) { > + return virtio_net_do_receive(nc, buf, size); > + } > + > + eth = (struct eth_header *)(buf + VIRTIO_HEADER); > + proto = htons(eth->h_proto); > + chain = virtio_net_rsc_lookup_chain(nc, proto); > + if (!chain) { > + return virtio_net_do_receive(nc, buf, size); > + } else { > + return chain->do_receive(chain, nc, buf, size); > + } > +} > + > +static ssize_t virtio_net_receive(NetClientState *nc, > + const uint8_t *buf, size_t size) > +{ > + if (virtio_net_rsc_bypass) { > + return virtio_net_do_receive(nc, buf, size); > + } else { > + return virtio_net_rsc_receive(nc, buf, size); > + } > +} > + > static NetClientInfo net_virtio_info = { > .type = NET_CLIENT_OPTIONS_KIND_NIC, > .size = sizeof(NICState), > -- > 2.4.0
On 02/01/2016 02:50 AM, Michael S. Tsirkin wrote: > On Mon, Feb 01, 2016 at 02:13:22AM +0800, wexu@redhat.com wrote: >> From: Wei Xu <wei@wei-thinkpad.nay.redhat.com> >> >> Upon a packet is arriving, a corresponding chain will be selected or created, >> or be bypassed if it's not an IPv4 packets. >> >> The callback in the chain will be invoked to call the real coalescing. >> >> Since the coalescing is based on the TCP connection, so the packets will be >> cached if there is no previous data within the same connection. >> >> The framework of IPv4 is also introduced. >> >> This patch depends on patch 2918cf2 (Detailed IPv4 and General TCP data >> coalescing) >> >> Signed-off-by: Wei Xu <wexu@redhat.com> >> --- >> hw/net/virtio-net.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++- >> 1 file changed, 172 insertions(+), 1 deletion(-) >> >> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c >> index 4e9458e..cfbac6d 100644 >> --- a/hw/net/virtio-net.c >> +++ b/hw/net/virtio-net.c >> @@ -14,10 +14,12 @@ >> #include "qemu/iov.h" >> #include "hw/virtio/virtio.h" >> #include "net/net.h" >> +#include "net/eth.h" >> #include "net/checksum.h" >> #include "net/tap.h" >> #include "qemu/error-report.h" >> #include "qemu/timer.h" >> +#include "qemu/sockets.h" >> #include "hw/virtio/virtio-net.h" >> #include "net/vhost_net.h" >> #include "hw/virtio/virtio-bus.h" >> @@ -37,6 +39,21 @@ >> #define endof(container, field) \ >> (offsetof(container, field) + sizeof(((container *)0)->field)) >> >> +#define VIRTIO_HEADER 12 /* Virtio net header size */ >> +#define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header)) >> + >> +#define MAX_VIRTIO_IP_PAYLOAD (65535 + IP_OFFSET) >> + >> +/* Global statistics */ >> +static uint32_t rsc_chain_no_mem; >> + >> +/* Switcher to enable/disable rsc */ >> +static bool virtio_net_rsc_bypass; >> + >> +/* Coalesce callback for ipv4/6 */ >> +typedef int32_t (VirtioNetCoalesce) (NetRscChain *chain, NetRscSeg *seg, >> + const uint8_t *buf, size_t size); >> + > Since there are only 2 cases, it's probably better to just > open-code if (v4) -> coalesce4 else if v6 -> coalesce6 OK, thanks mst. Wei > >> typedef struct VirtIOFeature { >> uint32_t flags; >> size_t end; >> @@ -1019,7 +1036,8 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) >> return 0; >> } >> >> -static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size) >> +static ssize_t virtio_net_do_receive(NetClientState *nc, >> + const uint8_t *buf, size_t size) >> { >> VirtIONet *n = qemu_get_nic_opaque(nc); >> VirtIONetQueue *q = virtio_net_get_subqueue(nc); >> @@ -1623,6 +1641,159 @@ static void virtio_net_rsc_cleanup(VirtIONet *n) >> } >> } >> >> +static int virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc, >> + const uint8_t *buf, size_t size) >> +{ >> + NetRscSeg *seg; >> + >> + seg = g_malloc(sizeof(NetRscSeg)); >> + if (!seg) { >> + return 0; >> + } >> + >> + seg->buf = g_malloc(MAX_VIRTIO_IP_PAYLOAD); >> + if (!seg->buf) { >> + goto out; >> + } >> + >> + memmove(seg->buf, buf, size); >> + seg->size = size; >> + seg->dup_ack_count = 0; >> + seg->is_coalesced = 0; >> + seg->nc = nc; >> + >> + QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); >> + return size; >> + >> +out: >> + g_free(seg); >> + return 0; >> +} >> + >> + >> +static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain, >> + NetRscSeg *seg, const uint8_t *buf, size_t size) >> +{ >> + /* This real part of this function will be introduced in next patch, just >> + * return a 'final' to feed the compilation. */ >> + return RSC_FINAL; >> +} >> + >> +static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc, >> + const uint8_t *buf, size_t size, VirtioNetCoalesce *coalesce) >> +{ >> + int ret; >> + NetRscSeg *seg, *nseg; >> + >> + if (QTAILQ_EMPTY(&chain->buffers)) { >> + if (!virtio_net_rsc_cache_buf(chain, nc, buf, size)) { >> + return 0; >> + } else { >> + return size; >> + } >> + } >> + >> + QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { >> + ret = coalesce(chain, seg, buf, size); >> + if (RSC_FINAL == ret) { >> + ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); >> + QTAILQ_REMOVE(&chain->buffers, seg, next); >> + g_free(seg->buf); >> + g_free(seg); >> + if (ret == 0) { >> + /* Send failed */ >> + return 0; >> + } >> + >> + /* Send current packet */ >> + return virtio_net_do_receive(nc, buf, size); >> + } else if (RSC_NO_MATCH == ret) { >> + continue; >> + } else { >> + /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ >> + seg->is_coalesced = 1; >> + return size; >> + } >> + } >> + >> + return virtio_net_rsc_cache_buf(chain, nc, buf, size); >> +} >> + >> +static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc, >> + const uint8_t *buf, size_t size) >> +{ >> + NetRscChain *chain; >> + >> + chain = (NetRscChain *)opq; >> + return virtio_net_rsc_callback(chain, nc, buf, size, >> + virtio_net_rsc_try_coalesce4); >> +} >> + >> +static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc, >> + uint16_t proto) >> +{ >> + VirtIONet *n; >> + NetRscChain *chain; >> + NICState *nic; >> + >> + /* Only handle IPv4/6 */ >> + if (proto != (uint16_t)ETH_P_IP) { >> + return NULL; >> + } >> + >> + nic = (NICState *)nc; >> + n = container_of(&nic, VirtIONet, nic); >> + QTAILQ_FOREACH(chain, &n->rsc_chains, next) { >> + if (chain->proto == proto) { >> + return chain; >> + } >> + } >> + >> + chain = g_malloc(sizeof(*chain)); >> + if (!chain) { >> + rsc_chain_no_mem++; >> + return NULL; >> + } >> + >> + chain->proto = proto; >> + chain->do_receive = virtio_net_rsc_receive4; >> + >> + QTAILQ_INIT(&chain->buffers); >> + QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); >> + return chain; >> +} >> + >> +static ssize_t virtio_net_rsc_receive(NetClientState *nc, >> + const uint8_t *buf, size_t size) >> +{ >> + uint16_t proto; >> + NetRscChain *chain; >> + struct eth_header *eth; >> + >> + if (size < IP_OFFSET) { >> + return virtio_net_do_receive(nc, buf, size); >> + } >> + >> + eth = (struct eth_header *)(buf + VIRTIO_HEADER); >> + proto = htons(eth->h_proto); >> + chain = virtio_net_rsc_lookup_chain(nc, proto); >> + if (!chain) { >> + return virtio_net_do_receive(nc, buf, size); >> + } else { >> + return chain->do_receive(chain, nc, buf, size); >> + } >> +} >> + >> +static ssize_t virtio_net_receive(NetClientState *nc, >> + const uint8_t *buf, size_t size) >> +{ >> + if (virtio_net_rsc_bypass) { >> + return virtio_net_do_receive(nc, buf, size); >> + } else { >> + return virtio_net_rsc_receive(nc, buf, size); >> + } >> +} >> + >> static NetClientInfo net_virtio_info = { >> .type = NET_CLIENT_OPTIONS_KIND_NIC, >> .size = sizeof(NICState), >> -- >> 2.4.0
On 02/01/2016 02:13 AM, wexu@redhat.com wrote: > From: Wei Xu <wei@wei-thinkpad.nay.redhat.com> > > Upon a packet is arriving, a corresponding chain will be selected or created, > or be bypassed if it's not an IPv4 packets. > > The callback in the chain will be invoked to call the real coalescing. > > Since the coalescing is based on the TCP connection, so the packets will be > cached if there is no previous data within the same connection. > > The framework of IPv4 is also introduced. > > This patch depends on patch 2918cf2 (Detailed IPv4 and General TCP data > coalescing) Then looks like the order needs to be changed? > > Signed-off-by: Wei Xu <wexu@redhat.com> > --- > hw/net/virtio-net.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 172 insertions(+), 1 deletion(-) > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c > index 4e9458e..cfbac6d 100644 > --- a/hw/net/virtio-net.c > +++ b/hw/net/virtio-net.c > @@ -14,10 +14,12 @@ > #include "qemu/iov.h" > #include "hw/virtio/virtio.h" > #include "net/net.h" > +#include "net/eth.h" > #include "net/checksum.h" > #include "net/tap.h" > #include "qemu/error-report.h" > #include "qemu/timer.h" > +#include "qemu/sockets.h" > #include "hw/virtio/virtio-net.h" > #include "net/vhost_net.h" > #include "hw/virtio/virtio-bus.h" > @@ -37,6 +39,21 @@ > #define endof(container, field) \ > (offsetof(container, field) + sizeof(((container *)0)->field)) > > +#define VIRTIO_HEADER 12 /* Virtio net header size */ This looks wrong if mrg_rxbuf (VIRTIO_NET_F_MRG_RXBUF) is off. > +#define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header)) > + > +#define MAX_VIRTIO_IP_PAYLOAD (65535 + IP_OFFSET) > + > +/* Global statistics */ > +static uint32_t rsc_chain_no_mem; This is meaningless, see below comments. > + > +/* Switcher to enable/disable rsc */ > +static bool virtio_net_rsc_bypass; > + > +/* Coalesce callback for ipv4/6 */ > +typedef int32_t (VirtioNetCoalesce) (NetRscChain *chain, NetRscSeg *seg, > + const uint8_t *buf, size_t size); > + > typedef struct VirtIOFeature { > uint32_t flags; > size_t end; > @@ -1019,7 +1036,8 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) > return 0; > } > > -static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size) > +static ssize_t virtio_net_do_receive(NetClientState *nc, > + const uint8_t *buf, size_t size) > { > VirtIONet *n = qemu_get_nic_opaque(nc); > VirtIONetQueue *q = virtio_net_get_subqueue(nc); > @@ -1623,6 +1641,159 @@ static void virtio_net_rsc_cleanup(VirtIONet *n) > } > } > > +static int virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc, > + const uint8_t *buf, size_t size) > +{ > + NetRscSeg *seg; > + > + seg = g_malloc(sizeof(NetRscSeg)); > + if (!seg) { > + return 0; > + } g_malloc() can't fail, no need to check if it succeeded. > + > + seg->buf = g_malloc(MAX_VIRTIO_IP_PAYLOAD); > + if (!seg->buf) { > + goto out; > + } > + > + memmove(seg->buf, buf, size); > + seg->size = size; > + seg->dup_ack_count = 0; > + seg->is_coalesced = 0; > + seg->nc = nc; > + > + QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); > + return size; > + > +out: > + g_free(seg); > + return 0; > +} > + > + > +static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain, > + NetRscSeg *seg, const uint8_t *buf, size_t size) > +{ > + /* This real part of this function will be introduced in next patch, just > + * return a 'final' to feed the compilation. */ > + return RSC_FINAL; > +} > + > +static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc, > + const uint8_t *buf, size_t size, VirtioNetCoalesce *coalesce) > +{ Looks like this function was called directly, so "callback" suffix is not accurate. > + int ret; > + NetRscSeg *seg, *nseg; > + > + if (QTAILQ_EMPTY(&chain->buffers)) { > + if (!virtio_net_rsc_cache_buf(chain, nc, buf, size)) { > + return 0; > + } else { > + return size; > + } > + } > + > + QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { > + ret = coalesce(chain, seg, buf, size); > + if (RSC_FINAL == ret) { Let's use "ret == RSC_FINAL" for a consistent coding style with other qemu codes. > + ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); > + QTAILQ_REMOVE(&chain->buffers, seg, next); > + g_free(seg->buf); > + g_free(seg); > + if (ret == 0) { > + /* Send failed */ > + return 0; > + } > + > + /* Send current packet */ > + return virtio_net_do_receive(nc, buf, size); > + } else if (RSC_NO_MATCH == ret) { > + continue; > + } else { > + /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ > + seg->is_coalesced = 1; > + return size; > + } > + } > + > + return virtio_net_rsc_cache_buf(chain, nc, buf, size); Maybe you can move the seg traversing info coalesce() function? This can greatly simplify the codes above? (E.g no need to call virtio_net_rsc_cache_buf() in two places?) > +} > + > +static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc, > + const uint8_t *buf, size_t size) > +{ > + NetRscChain *chain; > + > + chain = (NetRscChain *)opq; > + return virtio_net_rsc_callback(chain, nc, buf, size, > + virtio_net_rsc_try_coalesce4); > +} > + > +static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc, > + uint16_t proto) > +{ > + VirtIONet *n; > + NetRscChain *chain; > + NICState *nic; > + > + /* Only handle IPv4/6 */ > + if (proto != (uint16_t)ETH_P_IP) { The comments is inconsistent with code, the code can only handle IPv4 currently. > + return NULL; > + } > + > + nic = (NICState *)nc; This cast is wrong for multiqueue backend. You can refer the exist virtio_net_receive() for how to vet VirtIONet structure. E.g: ... VirtIONet *n = qemu_get_nic_opaque(nc); ... > + n = container_of(&nic, VirtIONet, nic); > + QTAILQ_FOREACH(chain, &n->rsc_chains, next) { > + if (chain->proto == proto) { > + return chain; > + } > + } > + > + chain = g_malloc(sizeof(*chain)); > + if (!chain) { > + rsc_chain_no_mem++; Since g_malloc() can't fail, this is meaningless. > + return NULL; > + } > + > + chain->proto = proto; > + chain->do_receive = virtio_net_rsc_receive4; > + > + QTAILQ_INIT(&chain->buffers); > + QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); > + return chain; > +} Better to split the chain initialization from lookup. And we can initialize ipv4 chain during initialization. > + > +static ssize_t virtio_net_rsc_receive(NetClientState *nc, > + const uint8_t *buf, size_t size) > +{ > + uint16_t proto; > + NetRscChain *chain; > + struct eth_header *eth; > + > + if (size < IP_OFFSET) { > + return virtio_net_do_receive(nc, buf, size); > + } > + > + eth = (struct eth_header *)(buf + VIRTIO_HEADER); > + proto = htons(eth->h_proto); > + chain = virtio_net_rsc_lookup_chain(nc, proto); > + if (!chain) { > + return virtio_net_do_receive(nc, buf, size); > + } else { > + return chain->do_receive(chain, nc, buf, size); > + } > +} > + > +static ssize_t virtio_net_receive(NetClientState *nc, > + const uint8_t *buf, size_t size) > +{ > + if (virtio_net_rsc_bypass) { > + return virtio_net_do_receive(nc, buf, size); > + } else { > + return virtio_net_rsc_receive(nc, buf, size); > + } > +} > + > static NetClientInfo net_virtio_info = { > .type = NET_CLIENT_OPTIONS_KIND_NIC, > .size = sizeof(NICState),
On 02/01/2016 01:55 PM, Jason Wang wrote: > > On 02/01/2016 02:13 AM, wexu@redhat.com wrote: >> From: Wei Xu <wei@wei-thinkpad.nay.redhat.com> >> >> Upon a packet is arriving, a corresponding chain will be selected or created, >> or be bypassed if it's not an IPv4 packets. >> >> The callback in the chain will be invoked to call the real coalescing. >> >> Since the coalescing is based on the TCP connection, so the packets will be >> cached if there is no previous data within the same connection. >> >> The framework of IPv4 is also introduced. >> >> This patch depends on patch 2918cf2 (Detailed IPv4 and General TCP data >> coalescing) > Then looks like the order needs to be changed? OK, as mentioned in other feedbacks, some of the patches should be merged, will adjust the patch set again, thanks. >> Signed-off-by: Wei Xu <wexu@redhat.com> >> --- >> hw/net/virtio-net.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++- >> 1 file changed, 172 insertions(+), 1 deletion(-) >> >> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c >> index 4e9458e..cfbac6d 100644 >> --- a/hw/net/virtio-net.c >> +++ b/hw/net/virtio-net.c >> @@ -14,10 +14,12 @@ >> #include "qemu/iov.h" >> #include "hw/virtio/virtio.h" >> #include "net/net.h" >> +#include "net/eth.h" >> #include "net/checksum.h" >> #include "net/tap.h" >> #include "qemu/error-report.h" >> #include "qemu/timer.h" >> +#include "qemu/sockets.h" >> #include "hw/virtio/virtio-net.h" >> #include "net/vhost_net.h" >> #include "hw/virtio/virtio-bus.h" >> @@ -37,6 +39,21 @@ >> #define endof(container, field) \ >> (offsetof(container, field) + sizeof(((container *)0)->field)) >> >> +#define VIRTIO_HEADER 12 /* Virtio net header size */ > This looks wrong if mrg_rxbuf (VIRTIO_NET_F_MRG_RXBUF) is off OK. > >> +#define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header)) >> + >> +#define MAX_VIRTIO_IP_PAYLOAD (65535 + IP_OFFSET) >> + >> +/* Global statistics */ >> +static uint32_t rsc_chain_no_mem; > This is meaningless, see below comments. Yes, should remove this. > >> + >> +/* Switcher to enable/disable rsc */ >> +static bool virtio_net_rsc_bypass; >> + >> +/* Coalesce callback for ipv4/6 */ >> +typedef int32_t (VirtioNetCoalesce) (NetRscChain *chain, NetRscSeg *seg, >> + const uint8_t *buf, size_t size); >> + >> typedef struct VirtIOFeature { >> uint32_t flags; >> size_t end; >> @@ -1019,7 +1036,8 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) >> return 0; >> } >> >> -static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size) >> +static ssize_t virtio_net_do_receive(NetClientState *nc, >> + const uint8_t *buf, size_t size) >> { >> VirtIONet *n = qemu_get_nic_opaque(nc); >> VirtIONetQueue *q = virtio_net_get_subqueue(nc); >> @@ -1623,6 +1641,159 @@ static void virtio_net_rsc_cleanup(VirtIONet *n) >> } >> } >> >> +static int virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc, >> + const uint8_t *buf, size_t size) >> +{ >> + NetRscSeg *seg; >> + >> + seg = g_malloc(sizeof(NetRscSeg)); >> + if (!seg) { >> + return 0; >> + } > g_malloc() can't fail, no need to check if it succeeded. OK. > >> + >> + seg->buf = g_malloc(MAX_VIRTIO_IP_PAYLOAD); >> + if (!seg->buf) { >> + goto out; >> + } >> + >> + memmove(seg->buf, buf, size); >> + seg->size = size; >> + seg->dup_ack_count = 0; >> + seg->is_coalesced = 0; >> + seg->nc = nc; >> + >> + QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); >> + return size; >> + >> +out: >> + g_free(seg); >> + return 0; >> +} >> + >> + >> +static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain, >> + NetRscSeg *seg, const uint8_t *buf, size_t size) >> +{ >> + /* This real part of this function will be introduced in next patch, just >> + * return a 'final' to feed the compilation. */ >> + return RSC_FINAL; >> +} >> + >> +static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc, >> + const uint8_t *buf, size_t size, VirtioNetCoalesce *coalesce) >> +{ > Looks like this function was called directly, so "callback" suffix is > not accurate. OK. > >> + int ret; >> + NetRscSeg *seg, *nseg; >> + >> + if (QTAILQ_EMPTY(&chain->buffers)) { >> + if (!virtio_net_rsc_cache_buf(chain, nc, buf, size)) { >> + return 0; >> + } else { >> + return size; >> + } >> + } >> + >> + QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { >> + ret = coalesce(chain, seg, buf, size); >> + if (RSC_FINAL == ret) { > Let's use "ret == RSC_FINAL" for a consistent coding style with other > qemu codes. OK. > >> + ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); >> + QTAILQ_REMOVE(&chain->buffers, seg, next); >> + g_free(seg->buf); >> + g_free(seg); >> + if (ret == 0) { >> + /* Send failed */ >> + return 0; >> + } >> + >> + /* Send current packet */ >> + return virtio_net_do_receive(nc, buf, size); >> + } else if (RSC_NO_MATCH == ret) { >> + continue; >> + } else { >> + /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ >> + seg->is_coalesced = 1; >> + return size; >> + } >> + } >> + >> + return virtio_net_rsc_cache_buf(chain, nc, buf, size); > Maybe you can move the seg traversing info coalesce() function? This can > greatly simplify the codes above? (E.g no need to call > virtio_net_rsc_cache_buf() in two places?) Good idea. > >> +} >> + >> +static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc, >> + const uint8_t *buf, size_t size) >> +{ >> + NetRscChain *chain; >> + >> + chain = (NetRscChain *)opq; >> + return virtio_net_rsc_callback(chain, nc, buf, size, >> + virtio_net_rsc_try_coalesce4); >> +} >> + >> +static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc, >> + uint16_t proto) >> +{ >> + VirtIONet *n; >> + NetRscChain *chain; >> + NICState *nic; >> + >> + /* Only handle IPv4/6 */ >> + if (proto != (uint16_t)ETH_P_IP) { > The comments is inconsistent with code, the code can only handle IPv4 > currently. OK. > >> + return NULL; >> + } >> + >> + nic = (NICState *)nc; > This cast is wrong for multiqueue backend. You can refer the exist > virtio_net_receive() for how to vet VirtIONet structure. E.g: > > ... > VirtIONet *n = qemu_get_nic_opaque(nc); OK, will check it out. > ... > >> + n = container_of(&nic, VirtIONet, nic); >> + QTAILQ_FOREACH(chain, &n->rsc_chains, next) { >> + if (chain->proto == proto) { >> + return chain; >> + } >> + } >> + >> + chain = g_malloc(sizeof(*chain)); >> + if (!chain) { >> + rsc_chain_no_mem++; > Since g_malloc() can't fail, this is meaningless. OK. > >> + return NULL; >> + } >> + >> + chain->proto = proto; >> + chain->do_receive = virtio_net_rsc_receive4; >> + >> + QTAILQ_INIT(&chain->buffers); >> + QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); >> + return chain; >> +} > Better to split the chain initialization from lookup. And we can > initialize ipv4 chain during initialization. Since the allocation happens really seldom, is it ok to keep the mechanism to make the logic clean? > >> + >> +static ssize_t virtio_net_rsc_receive(NetClientState *nc, >> + const uint8_t *buf, size_t size) >> +{ >> + uint16_t proto; >> + NetRscChain *chain; >> + struct eth_header *eth; >> + >> + if (size < IP_OFFSET) { >> + return virtio_net_do_receive(nc, buf, size); >> + } >> + >> + eth = (struct eth_header *)(buf + VIRTIO_HEADER); >> + proto = htons(eth->h_proto); >> + chain = virtio_net_rsc_lookup_chain(nc, proto); >> + if (!chain) { >> + return virtio_net_do_receive(nc, buf, size); >> + } else { >> + return chain->do_receive(chain, nc, buf, size); >> + } >> +} >> + >> +static ssize_t virtio_net_receive(NetClientState *nc, >> + const uint8_t *buf, size_t size) >> +{ >> + if (virtio_net_rsc_bypass) { >> + return virtio_net_do_receive(nc, buf, size); >> + } else { >> + return virtio_net_rsc_receive(nc, buf, size); >> + } >> +} >> + >> static NetClientInfo net_virtio_info = { >> .type = NET_CLIENT_OPTIONS_KIND_NIC, >> .size = sizeof(NICState), > >
On 02/01/2016 04:02 PM, Wei Xu wrote: [...] >> >>> + return NULL; >>> + } >>> + >>> + chain->proto = proto; >>> + chain->do_receive = virtio_net_rsc_receive4; >>> + >>> + QTAILQ_INIT(&chain->buffers); >>> + QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); >>> + return chain; >>> +} >> Better to split the chain initialization from lookup. And we can >> initialize ipv4 chain during initialization. > > Since the allocation happens really seldom, is it ok to keep the > mechanism to make the logic clean? Ok for now.
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 4e9458e..cfbac6d 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -14,10 +14,12 @@ #include "qemu/iov.h" #include "hw/virtio/virtio.h" #include "net/net.h" +#include "net/eth.h" #include "net/checksum.h" #include "net/tap.h" #include "qemu/error-report.h" #include "qemu/timer.h" +#include "qemu/sockets.h" #include "hw/virtio/virtio-net.h" #include "net/vhost_net.h" #include "hw/virtio/virtio-bus.h" @@ -37,6 +39,21 @@ #define endof(container, field) \ (offsetof(container, field) + sizeof(((container *)0)->field)) +#define VIRTIO_HEADER 12 /* Virtio net header size */ +#define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header)) + +#define MAX_VIRTIO_IP_PAYLOAD (65535 + IP_OFFSET) + +/* Global statistics */ +static uint32_t rsc_chain_no_mem; + +/* Switcher to enable/disable rsc */ +static bool virtio_net_rsc_bypass; + +/* Coalesce callback for ipv4/6 */ +typedef int32_t (VirtioNetCoalesce) (NetRscChain *chain, NetRscSeg *seg, + const uint8_t *buf, size_t size); + typedef struct VirtIOFeature { uint32_t flags; size_t end; @@ -1019,7 +1036,8 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) return 0; } -static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size) +static ssize_t virtio_net_do_receive(NetClientState *nc, + const uint8_t *buf, size_t size) { VirtIONet *n = qemu_get_nic_opaque(nc); VirtIONetQueue *q = virtio_net_get_subqueue(nc); @@ -1623,6 +1641,159 @@ static void virtio_net_rsc_cleanup(VirtIONet *n) } } +static int virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc, + const uint8_t *buf, size_t size) +{ + NetRscSeg *seg; + + seg = g_malloc(sizeof(NetRscSeg)); + if (!seg) { + return 0; + } + + seg->buf = g_malloc(MAX_VIRTIO_IP_PAYLOAD); + if (!seg->buf) { + goto out; + } + + memmove(seg->buf, buf, size); + seg->size = size; + seg->dup_ack_count = 0; + seg->is_coalesced = 0; + seg->nc = nc; + + QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); + return size; + +out: + g_free(seg); + return 0; +} + + +static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain, + NetRscSeg *seg, const uint8_t *buf, size_t size) +{ + /* This real part of this function will be introduced in next patch, just + * return a 'final' to feed the compilation. */ + return RSC_FINAL; +} + +static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc, + const uint8_t *buf, size_t size, VirtioNetCoalesce *coalesce) +{ + int ret; + NetRscSeg *seg, *nseg; + + if (QTAILQ_EMPTY(&chain->buffers)) { + if (!virtio_net_rsc_cache_buf(chain, nc, buf, size)) { + return 0; + } else { + return size; + } + } + + QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { + ret = coalesce(chain, seg, buf, size); + if (RSC_FINAL == ret) { + ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); + QTAILQ_REMOVE(&chain->buffers, seg, next); + g_free(seg->buf); + g_free(seg); + if (ret == 0) { + /* Send failed */ + return 0; + } + + /* Send current packet */ + return virtio_net_do_receive(nc, buf, size); + } else if (RSC_NO_MATCH == ret) { + continue; + } else { + /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ + seg->is_coalesced = 1; + return size; + } + } + + return virtio_net_rsc_cache_buf(chain, nc, buf, size); +} + +static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc, + const uint8_t *buf, size_t size) +{ + NetRscChain *chain; + + chain = (NetRscChain *)opq; + return virtio_net_rsc_callback(chain, nc, buf, size, + virtio_net_rsc_try_coalesce4); +} + +static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc, + uint16_t proto) +{ + VirtIONet *n; + NetRscChain *chain; + NICState *nic; + + /* Only handle IPv4/6 */ + if (proto != (uint16_t)ETH_P_IP) { + return NULL; + } + + nic = (NICState *)nc; + n = container_of(&nic, VirtIONet, nic); + QTAILQ_FOREACH(chain, &n->rsc_chains, next) { + if (chain->proto == proto) { + return chain; + } + } + + chain = g_malloc(sizeof(*chain)); + if (!chain) { + rsc_chain_no_mem++; + return NULL; + } + + chain->proto = proto; + chain->do_receive = virtio_net_rsc_receive4; + + QTAILQ_INIT(&chain->buffers); + QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); + return chain; +} + +static ssize_t virtio_net_rsc_receive(NetClientState *nc, + const uint8_t *buf, size_t size) +{ + uint16_t proto; + NetRscChain *chain; + struct eth_header *eth; + + if (size < IP_OFFSET) { + return virtio_net_do_receive(nc, buf, size); + } + + eth = (struct eth_header *)(buf + VIRTIO_HEADER); + proto = htons(eth->h_proto); + chain = virtio_net_rsc_lookup_chain(nc, proto); + if (!chain) { + return virtio_net_do_receive(nc, buf, size); + } else { + return chain->do_receive(chain, nc, buf, size); + } +} + +static ssize_t virtio_net_receive(NetClientState *nc, + const uint8_t *buf, size_t size) +{ + if (virtio_net_rsc_bypass) { + return virtio_net_do_receive(nc, buf, size); + } else { + return virtio_net_rsc_receive(nc, buf, size); + } +} + static NetClientInfo net_virtio_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState),