diff mbox

[1/2] kvm tools: Respect guest tcp window size

Message ID 1345685212-8081-1-git-send-email-asias.hejun@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Asias He Aug. 23, 2012, 1:26 a.m. UTC
Respect guest tcp window size and stop sending tcp segments to guest
if guest's receive window is closed.

This fixes the TCP hang I'm seeing where guest and host are transferring
big chuck of data.

This problem was not triggered when guest and external host
communicates, probably because guest to external host communication
walks through real network and is much slower than guest and host
communication. Thus, guest's receive window has little chance to be
closed.

Signed-off-by: Asias He <asias.hejun@gmail.com>
---
 tools/kvm/include/kvm/uip.h |  1 +
 tools/kvm/net/uip/tcp.c     | 28 ++++++++++++++++++++++------
 2 files changed, 23 insertions(+), 6 deletions(-)

Comments

Pekka Enberg Aug. 23, 2012, 9:31 a.m. UTC | #1
On Thu, Aug 23, 2012 at 4:26 AM, Asias He <asias.hejun@gmail.com> wrote:
> Respect guest tcp window size and stop sending tcp segments to guest
> if guest's receive window is closed.
>
> This fixes the TCP hang I'm seeing where guest and host are transferring
> big chuck of data.

Awesome!

> This problem was not triggered when guest and external host
> communicates, probably because guest to external host communication
> walks through real network and is much slower than guest and host
> communication. Thus, guest's receive window has little chance to be
> closed.
>
> Signed-off-by: Asias He <asias.hejun@gmail.com>
> ---
>  tools/kvm/include/kvm/uip.h |  1 +
>  tools/kvm/net/uip/tcp.c     | 28 ++++++++++++++++++++++------
>  2 files changed, 23 insertions(+), 6 deletions(-)
>
> diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h
> index 4497f6a..191029b 100644
> --- a/tools/kvm/include/kvm/uip.h
> +++ b/tools/kvm/include/kvm/uip.h
> @@ -235,6 +235,7 @@ struct uip_tcp_socket {
>         pthread_t thread;
>         u32 dport, sport;
>         u32 guest_acked;
> +       u16 window_size;
>         /*
>          * Initial Sequence Number
>          */
> diff --git a/tools/kvm/net/uip/tcp.c b/tools/kvm/net/uip/tcp.c
> index 586a45c..cf46bac 100644
> --- a/tools/kvm/net/uip/tcp.c
> +++ b/tools/kvm/net/uip/tcp.c
> @@ -167,25 +167,38 @@ static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_
>  static void *uip_tcp_socket_thread(void *p)
>  {
>         struct uip_tcp_socket *sk;
> -       u8 *payload;
> -       int ret;
> +       u8 *payload, *pos;
> +       int len, left, ret;
>
>         sk = p;
>
>         payload = malloc(UIP_MAX_TCP_PAYLOAD);
> -       sk->payload = payload;
> -       if (!sk->payload)
> +       if (!payload)
>                 goto out;
>
>         while (1) {
> +               pos = payload;
>
>                 ret = read(sk->fd, payload, UIP_MAX_TCP_PAYLOAD);
>
>                 if (ret <= 0 || ret > UIP_MAX_TCP_PAYLOAD)
>                         goto out;
>
> -               uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, ret);
> +               left = ret;
> +
> +               while (left > 0) {
> +                       while ((len = sk->guest_acked + sk->window_size - sk->seq_server) <= 0)
> +                               usleep(100);

So what exactly is this piece of code trying to accomplish? Surely
there's some more reasonable way to handle it?

> +                       sk->payload = pos;
> +                       if (len > left)
> +                               len = left;
> +                       if (len > UIP_MAX_TCP_PAYLOAD)
> +                               len = UIP_MAX_TCP_PAYLOAD;
> +                       left -= len;
>
> +                       pos += len;
> +                       uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, len);
> +               }
>         }
>
>  out:
> @@ -199,7 +212,7 @@ out:
>
>         sk->read_done = 1;
>
> -       free(sk->payload);
> +       free(payload);
>         pthread_exit(NULL);
>
>         return NULL;
> @@ -250,6 +263,8 @@ int uip_tx_do_ipv4_tcp(struct uip_tx_arg *arg)
>                 if (!sk)
>                         return -1;
>
> +               sk->window_size = ntohs(tcp->win);
> +
>                 /*
>                  * Setup ISN number
>                  */
> @@ -276,6 +291,7 @@ int uip_tx_do_ipv4_tcp(struct uip_tx_arg *arg)
>         if (!sk)
>                 return -1;
>
> +       sk->window_size = ntohs(tcp->win);
>         sk->guest_acked = ntohl(tcp->ack);
>
>         if (uip_tcp_is_fin(tcp)) {
> --
> 1.7.11.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Asias He Aug. 23, 2012, 9:36 a.m. UTC | #2
On Thu, Aug 23, 2012 at 5:31 PM, Pekka Enberg <penberg@kernel.org> wrote:
> On Thu, Aug 23, 2012 at 4:26 AM, Asias He <asias.hejun@gmail.com> wrote:
>> Respect guest tcp window size and stop sending tcp segments to guest
>> if guest's receive window is closed.
>>
>> This fixes the TCP hang I'm seeing where guest and host are transferring
>> big chuck of data.
>
> Awesome!
>
>> This problem was not triggered when guest and external host
>> communicates, probably because guest to external host communication
>> walks through real network and is much slower than guest and host
>> communication. Thus, guest's receive window has little chance to be
>> closed.
>>
>> Signed-off-by: Asias He <asias.hejun@gmail.com>
>> ---
>>  tools/kvm/include/kvm/uip.h |  1 +
>>  tools/kvm/net/uip/tcp.c     | 28 ++++++++++++++++++++++------
>>  2 files changed, 23 insertions(+), 6 deletions(-)
>>
>> diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h
>> index 4497f6a..191029b 100644
>> --- a/tools/kvm/include/kvm/uip.h
>> +++ b/tools/kvm/include/kvm/uip.h
>> @@ -235,6 +235,7 @@ struct uip_tcp_socket {
>>         pthread_t thread;
>>         u32 dport, sport;
>>         u32 guest_acked;
>> +       u16 window_size;
>>         /*
>>          * Initial Sequence Number
>>          */
>> diff --git a/tools/kvm/net/uip/tcp.c b/tools/kvm/net/uip/tcp.c
>> index 586a45c..cf46bac 100644
>> --- a/tools/kvm/net/uip/tcp.c
>> +++ b/tools/kvm/net/uip/tcp.c
>> @@ -167,25 +167,38 @@ static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_
>>  static void *uip_tcp_socket_thread(void *p)
>>  {
>>         struct uip_tcp_socket *sk;
>> -       u8 *payload;
>> -       int ret;
>> +       u8 *payload, *pos;
>> +       int len, left, ret;
>>
>>         sk = p;
>>
>>         payload = malloc(UIP_MAX_TCP_PAYLOAD);
>> -       sk->payload = payload;
>> -       if (!sk->payload)
>> +       if (!payload)
>>                 goto out;
>>
>>         while (1) {
>> +               pos = payload;
>>
>>                 ret = read(sk->fd, payload, UIP_MAX_TCP_PAYLOAD);
>>
>>                 if (ret <= 0 || ret > UIP_MAX_TCP_PAYLOAD)
>>                         goto out;
>>
>> -               uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, ret);
>> +               left = ret;
>> +
>> +               while (left > 0) {
>> +                       while ((len = sk->guest_acked + sk->window_size - sk->seq_server) <= 0)
>> +                               usleep(100);
>
> So what exactly is this piece of code trying to accomplish? Surely
> there's some more reasonable way to handle it?

Wait until the guest is available to receive some more tcp data.

Yes, we can introduce some sync ops here to do this. But for now, I'd
prefer this simple method ;-)

>> +                       sk->payload = pos;
>> +                       if (len > left)
>> +                               len = left;
>> +                       if (len > UIP_MAX_TCP_PAYLOAD)
>> +                               len = UIP_MAX_TCP_PAYLOAD;
>> +                       left -= len;
>>
>> +                       pos += len;
>> +                       uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, len);
>> +               }
>>         }
>>
>>  out:
>> @@ -199,7 +212,7 @@ out:
>>
>>         sk->read_done = 1;
>>
>> -       free(sk->payload);
>> +       free(payload);
>>         pthread_exit(NULL);
>>
>>         return NULL;
>> @@ -250,6 +263,8 @@ int uip_tx_do_ipv4_tcp(struct uip_tx_arg *arg)
>>                 if (!sk)
>>                         return -1;
>>
>> +               sk->window_size = ntohs(tcp->win);
>> +
>>                 /*
>>                  * Setup ISN number
>>                  */
>> @@ -276,6 +291,7 @@ int uip_tx_do_ipv4_tcp(struct uip_tx_arg *arg)
>>         if (!sk)
>>                 return -1;
>>
>> +       sk->window_size = ntohs(tcp->win);
>>         sk->guest_acked = ntohl(tcp->ack);
>>
>>         if (uip_tcp_is_fin(tcp)) {
>> --
>> 1.7.11.4
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pekka Enberg Aug. 23, 2012, 10:23 a.m. UTC | #3
On Thu, Aug 23, 2012 at 12:36 PM, Asias He <asias.hejun@gmail.com> wrote:
>>>         while (1) {
>>> +               pos = payload;
>>>
>>>                 ret = read(sk->fd, payload, UIP_MAX_TCP_PAYLOAD);
>>>
>>>                 if (ret <= 0 || ret > UIP_MAX_TCP_PAYLOAD)
>>>                         goto out;
>>>
>>> -               uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, ret);
>>> +               left = ret;
>>> +
>>> +               while (left > 0) {
>>> +                       while ((len = sk->guest_acked + sk->window_size - sk->seq_server) <= 0)
>>> +                               usleep(100);
>>
>> So what exactly is this piece of code trying to accomplish? Surely
>> there's some more reasonable way to handle it?
>
> Wait until the guest is available to receive some more tcp data.
>
> Yes, we can introduce some sync ops here to do this. But for now, I'd
> prefer this simple method ;-)

It doesn't look all that simple and since there's no locking, it's not
safe. Can we use pthread condition variables here?

                        Pekka
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h
index 4497f6a..191029b 100644
--- a/tools/kvm/include/kvm/uip.h
+++ b/tools/kvm/include/kvm/uip.h
@@ -235,6 +235,7 @@  struct uip_tcp_socket {
 	pthread_t thread;
 	u32 dport, sport;
 	u32 guest_acked;
+	u16 window_size;
 	/*
 	 * Initial Sequence Number
 	 */
diff --git a/tools/kvm/net/uip/tcp.c b/tools/kvm/net/uip/tcp.c
index 586a45c..cf46bac 100644
--- a/tools/kvm/net/uip/tcp.c
+++ b/tools/kvm/net/uip/tcp.c
@@ -167,25 +167,38 @@  static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_
 static void *uip_tcp_socket_thread(void *p)
 {
 	struct uip_tcp_socket *sk;
-	u8 *payload;
-	int ret;
+	u8 *payload, *pos;
+	int len, left, ret;
 
 	sk = p;
 
 	payload = malloc(UIP_MAX_TCP_PAYLOAD);
-	sk->payload = payload;
-	if (!sk->payload)
+	if (!payload)
 		goto out;
 
 	while (1) {
+		pos = payload;
 
 		ret = read(sk->fd, payload, UIP_MAX_TCP_PAYLOAD);
 
 		if (ret <= 0 || ret > UIP_MAX_TCP_PAYLOAD)
 			goto out;
 
-		uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, ret);
+		left = ret;
+
+		while (left > 0) {
+			while ((len = sk->guest_acked + sk->window_size - sk->seq_server) <= 0)
+				usleep(100);
+			sk->payload = pos;
+			if (len > left)
+				len = left;
+			if (len > UIP_MAX_TCP_PAYLOAD)
+				len = UIP_MAX_TCP_PAYLOAD;
+			left -= len;
 
+			pos += len;
+			uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, len);
+		}
 	}
 
 out:
@@ -199,7 +212,7 @@  out:
 
 	sk->read_done = 1;
 
-	free(sk->payload);
+	free(payload);
 	pthread_exit(NULL);
 
 	return NULL;
@@ -250,6 +263,8 @@  int uip_tx_do_ipv4_tcp(struct uip_tx_arg *arg)
 		if (!sk)
 			return -1;
 
+		sk->window_size = ntohs(tcp->win);
+
 		/*
 		 * Setup ISN number
 		 */
@@ -276,6 +291,7 @@  int uip_tx_do_ipv4_tcp(struct uip_tx_arg *arg)
 	if (!sk)
 		return -1;
 
+	sk->window_size = ntohs(tcp->win);
 	sk->guest_acked = ntohl(tcp->ack);
 
 	if (uip_tcp_is_fin(tcp)) {