diff mbox

[V2,2/2] kvm tools: Respect guest tcp window size

Message ID 1345726071-3517-2-git-send-email-asias.hejun@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Asias He Aug. 23, 2012, 12:47 p.m. UTC
Respect guest tcp window size and stop sending tcp segments to guest
if guest's receive window is closed.

This fixes the TCP hang I'm seeing where guest and host are transferring
big chuck of data.

This problem was not triggered when guest and external host
communicates, probably because guest to external host communication
walks through real network and is much slower than guest and host
communication. Thus, guest's receive window has little chance to be
closed.

v2: use pthread_cond_wait to wait

Signed-off-by: Asias He <asias.hejun@gmail.com>
---
 tools/kvm/include/kvm/uip.h |  2 ++
 tools/kvm/net/uip/tcp.c     | 36 ++++++++++++++++++++++++++++++------
 2 files changed, 32 insertions(+), 6 deletions(-)
diff mbox

Patch

diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h
index 4497f6a..9af0110 100644
--- a/tools/kvm/include/kvm/uip.h
+++ b/tools/kvm/include/kvm/uip.h
@@ -231,10 +231,12 @@  struct uip_tcp_socket {
 	struct sockaddr_in addr;
 	struct list_head list;
 	struct uip_info *info;
+	pthread_cond_t	cond;
 	pthread_mutex_t *lock;
 	pthread_t thread;
 	u32 dport, sport;
 	u32 guest_acked;
+	u16 window_size;
 	/*
 	 * Initial Sequence Number
 	 */
diff --git a/tools/kvm/net/uip/tcp.c b/tools/kvm/net/uip/tcp.c
index 68a1d6e..711a716 100644
--- a/tools/kvm/net/uip/tcp.c
+++ b/tools/kvm/net/uip/tcp.c
@@ -70,6 +70,8 @@  static struct uip_tcp_socket *uip_tcp_socket_alloc(struct uip_tx_arg *arg, u32 s
 	sk->addr.sin_port		= dport;
 	sk->addr.sin_addr.s_addr	= dip;
 
+	pthread_cond_init(&sk->cond, NULL);
+
 	if (ntohl(dip) == arg->info->host_ip)
 		sk->addr.sin_addr.s_addr = inet_addr("127.0.0.1");
 
@@ -171,25 +173,41 @@  static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_
 static void *uip_tcp_socket_thread(void *p)
 {
 	struct uip_tcp_socket *sk;
-	u8 *payload;
-	int ret;
+	int len, left, ret;
+	u8 *payload, *pos;
 
 	sk = p;
 
 	payload = malloc(UIP_MAX_TCP_PAYLOAD);
-	sk->payload = payload;
-	if (!sk->payload)
+	if (!payload)
 		goto out;
 
 	while (1) {
+		pos = payload;
 
 		ret = read(sk->fd, payload, UIP_MAX_TCP_PAYLOAD);
 
 		if (ret <= 0 || ret > UIP_MAX_TCP_PAYLOAD)
 			goto out;
 
-		uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, ret);
+		left = ret;
+
+		while (left > 0) {
+			mutex_lock(sk->lock);
+			while ((len = sk->guest_acked + sk->window_size - sk->seq_server) <= 0)
+				pthread_cond_wait(&sk->cond, sk->lock);
+			mutex_unlock(sk->lock);
 
+			sk->payload = pos;
+			if (len > left)
+				len = left;
+			if (len > UIP_MAX_TCP_PAYLOAD)
+				len = UIP_MAX_TCP_PAYLOAD;
+			left -= len;
+			pos += len;
+
+			uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, len);
+		}
 	}
 
 out:
@@ -203,7 +221,7 @@  out:
 
 	sk->read_done = 1;
 
-	free(sk->payload);
+	free(payload);
 	pthread_exit(NULL);
 
 	return NULL;
@@ -254,6 +272,8 @@  int uip_tx_do_ipv4_tcp(struct uip_tx_arg *arg)
 		if (!sk)
 			return -1;
 
+		sk->window_size = ntohs(tcp->win);
+
 		/*
 		 * Setup ISN number
 		 */
@@ -280,7 +300,11 @@  int uip_tx_do_ipv4_tcp(struct uip_tx_arg *arg)
 	if (!sk)
 		return -1;
 
+	mutex_lock(sk->lock);
+	sk->window_size = ntohs(tcp->win);
 	sk->guest_acked = ntohl(tcp->ack);
+	pthread_cond_signal(&sk->cond);
+	mutex_unlock(sk->lock);
 
 	if (uip_tcp_is_fin(tcp)) {
 		if (sk->write_done)