diff mbox

[PATCHv2] vhost-net: add dhclient work-around from userspace

Message ID 20100628100807.GA30685@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michael S. Tsirkin June 28, 2010, 10:08 a.m. UTC
None
diff mbox

Patch

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index cc19595..03bba6a 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -24,6 +24,10 @@ 
 #include <linux/if_tun.h>
 #include <linux/if_macvlan.h>
 
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/netdevice.h>
+
 #include <net/sock.h>
 
 #include "vhost.h"
@@ -186,6 +190,44 @@  static void handle_tx(struct vhost_net *net)
 	unuse_mm(net->dev.mm);
 }
 
+static int peek_head(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	lock_sock(sk);
+	skb = skb_peek(&sk->sk_receive_queue);
+	if (unlikely(!skb)) {
+		release_sock(sk);
+		return 0;
+	}
+	/* Userspace virtio server has the following hack so
+	 * guests rely on it, and we have to replicate it, too: */
+	/* Use port number to detect incoming IPv4 DHCP response packets,
+	 * and fill in the checksum. */
+
+	/* The issue we are solving is that on linux guests, some apps
+	 * that use recvmsg with AF_PACKET sockets, don't know how to
+	 * handle CHECKSUM_PARTIAL;
+	 * The interface to return the relevant information was added in
+	 * 8dc4194474159660d7f37c495e3fc3f10d0db8cc,
+	 * and older userspace does not use it.
+	 * One important user of recvmsg with AF_PACKET is dhclient,
+	 * so we add a work-around just for DHCP. */
+	if (skb->ip_summed == CHECKSUM_PARTIAL &&
+	    skb_headlen(skb) >= skb_transport_offset(skb) +
+				sizeof(struct udphdr) &&
+	    udp_hdr(skb)->dest == htons(68) &&
+	    skb_network_header_len(skb) >= sizeof(struct iphdr) &&
+	    ip_hdr(skb)->protocol == IPPROTO_UDP &&
+	    skb->protocol == htons(ETH_P_IP)) {
+		skb_checksum_help(skb);
+		/* Restore ip_summed value: tun passes it to user. */
+		skb->ip_summed = CHECKSUM_PARTIAL;
+	}
+	release_sock(sk);
+	return 1;
+}
+
 /* Expects to be always run from workqueue - which acts as
  * read-size critical section for our kind of RCU. */
 static void handle_rx(struct vhost_net *net)
@@ -222,7 +264,7 @@  static void handle_rx(struct vhost_net *net)
 	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
 		vq->log : NULL;
 
-	for (;;) {
+	while (peek_head(sock->sk)) {
 		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
 					 ARRAY_SIZE(vq->iov),
 					 &out, &in,