From patchwork Thu Jun 30 08:41:08 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Asias He X-Patchwork-Id: 932332 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.4) with ESMTP id p5U8jLl1014426 for ; Thu, 30 Jun 2011 08:45:21 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1758903Ab1F3IpR (ORCPT ); Thu, 30 Jun 2011 04:45:17 -0400 Received: from mail-iy0-f174.google.com ([209.85.210.174]:44108 "EHLO mail-iy0-f174.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1758793Ab1F3IpP (ORCPT ); Thu, 30 Jun 2011 04:45:15 -0400 Received: by mail-iy0-f174.google.com with SMTP id 12so1722680iyb.19 for ; Thu, 30 Jun 2011 01:45:15 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=from:to:cc:subject:date:message-id:x-mailer:in-reply-to:references; bh=KiQpNhz+Xd+AIcNObCsZNlThq4i1P0oJOzoO19AAjpM=; b=EjFPLrE7MilwEHy9f2leO3KuHsDHn+hxEPJ7p0HabkvrgAiUzJsxL5AiUwokg/cBcq v4LVLbg2ubdJ19v994H6nWGOaDNvnzpXO4s7235ze+gx7cqoYi/mbq3TKysTcbRoMzSJ T9IPdoKu4JsmpxixZ02OUmz4aIL1ax4zDGYv8= Received: by 10.42.174.193 with SMTP id w1mr1658864icz.454.1309423515421; Thu, 30 Jun 2011 01:45:15 -0700 (PDT) Received: from localhost.localdomain ([219.224.169.130]) by mx.google.com with ESMTPS id d6sm1967338icx.1.2011.06.30.01.45.11 (version=TLSv1/SSLv3 cipher=OTHER); Thu, 30 Jun 2011 01:45:14 -0700 (PDT) From: Asias He To: Pekka Enberg Cc: Cyrill Gorcunov , Ingo Molnar , Sasha Levin , Prasad Joshi , kvm@vger.kernel.org, Asias He Subject: [PATCH v2 20/31] kvm tools: Add TCP support for uip Date: Thu, 30 Jun 2011 16:41:08 +0800 Message-Id: <1309423279-3093-21-git-send-email-asias.hejun@gmail.com> X-Mailer: git-send-email 1.7.5.4 In-Reply-To: <1309423279-3093-1-git-send-email-asias.hejun@gmail.com> References: <1309423279-3093-1-git-send-email-asias.hejun@gmail.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Thu, 30 Jun 2011 08:45:21 +0000 (UTC) - Implement uip_tx_do_ipv4_tcp() to send TCP package to remote host. - Implement uip_tcp_socket_thread() to receive TCP package from remote host. Signed-off-by: Asias He --- tools/kvm/Makefile | 1 + tools/kvm/include/kvm/uip.h | 4 + tools/kvm/uip/ipv4.c | 3 + tools/kvm/uip/tcp.c | 317 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 325 insertions(+), 0 deletions(-) create mode 100644 tools/kvm/uip/tcp.c diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile index f92cfdb..c088718 100644 --- a/tools/kvm/Makefile +++ b/tools/kvm/Makefile @@ -48,6 +48,7 @@ OBJS += irq.o OBJS += uip/arp.o OBJS += uip/icmp.o OBJS += uip/ipv4.o +OBJS += uip/tcp.o OBJS += uip/udp.o OBJS += uip/buf.o OBJS += uip/csum.o diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h index 37c87c6..893c5f8 100644 --- a/tools/kvm/include/kvm/uip.h +++ b/tools/kvm/include/kvm/uip.h @@ -17,7 +17,10 @@ #define UIP_IP_HDR_LEN 0X05 #define UIP_IP_TTL 0X40 #define UIP_IP_P_UDP 0X11 +#define UIP_IP_P_TCP 0X06 +#define UIP_TCP_HDR_LEN 0x50 +#define UIP_TCP_WIN_SIZE 14600 #define UIP_TCP_FLAG_FIN 1 #define UIP_TCP_FLAG_SYN 2 #define UIP_TCP_FLAG_RST 4 @@ -265,6 +268,7 @@ static inline u16 uip_eth_hdrlen(struct uip_eth *eth) } int uip_tx_do_ipv4_icmp(struct uip_tx_arg *arg); +int uip_tx_do_ipv4_tcp(struct uip_tx_arg *arg); int uip_tx_do_ipv4_udp(struct uip_tx_arg *arg); int uip_tx_do_ipv4(struct uip_tx_arg *arg); int uip_tx_do_arp(struct uip_tx_arg *arg); diff --git a/tools/kvm/uip/ipv4.c b/tools/kvm/uip/ipv4.c index 75058cd..4def129 100644 --- a/tools/kvm/uip/ipv4.c +++ b/tools/kvm/uip/ipv4.c @@ -15,6 +15,9 @@ int uip_tx_do_ipv4(struct uip_tx_arg *arg) case 0x01: /* ICMP */ uip_tx_do_ipv4_icmp(arg); break; + case 0x06: /* TCP */ + uip_tx_do_ipv4_tcp(arg); + break; case 0x11: /* UDP */ uip_tx_do_ipv4_udp(arg); break; diff --git a/tools/kvm/uip/tcp.c b/tools/kvm/uip/tcp.c new file mode 100644 index 0000000..586a45c --- /dev/null +++ b/tools/kvm/uip/tcp.c @@ -0,0 +1,317 @@ +#include "kvm/uip.h" + +#include +#include +#include + +static int uip_tcp_socket_close(struct uip_tcp_socket *sk, int how) +{ + shutdown(sk->fd, how); + + if (sk->write_done && sk->read_done) { + shutdown(sk->fd, SHUT_RDWR); + close(sk->fd); + + mutex_lock(sk->lock); + list_del(&sk->list); + mutex_unlock(sk->lock); + + free(sk); + } + + return 0; +} + +static struct uip_tcp_socket *uip_tcp_socket_find(struct uip_tx_arg *arg, u32 sip, u32 dip, u16 sport, u16 dport) +{ + struct list_head *sk_head; + pthread_mutex_t *sk_lock; + struct uip_tcp_socket *sk; + + sk_head = &arg->info->tcp_socket_head; + sk_lock = &arg->info->tcp_socket_lock; + + mutex_lock(sk_lock); + list_for_each_entry(sk, sk_head, list) { + if (sk->sip == sip && sk->dip == dip && sk->sport == sport && sk->dport == dport) { + mutex_unlock(sk_lock); + return sk; + } + } + mutex_unlock(sk_lock); + + return NULL; +} + +static struct uip_tcp_socket *uip_tcp_socket_alloc(struct uip_tx_arg *arg, u32 sip, u32 dip, u16 sport, u16 dport) +{ + struct list_head *sk_head; + struct uip_tcp_socket *sk; + pthread_mutex_t *sk_lock; + struct uip_tcp *tcp; + struct uip_ip *ip; + int ret; + + tcp = (struct uip_tcp *)arg->eth; + ip = (struct uip_ip *)arg->eth; + + sk_head = &arg->info->tcp_socket_head; + sk_lock = &arg->info->tcp_socket_lock; + + sk = malloc(sizeof(*sk)); + memset(sk, 0, sizeof(*sk)); + + sk->lock = sk_lock; + sk->info = arg->info; + + sk->fd = socket(AF_INET, SOCK_STREAM, 0); + sk->addr.sin_family = AF_INET; + sk->addr.sin_addr.s_addr = dip; + sk->addr.sin_port = dport; + + ret = connect(sk->fd, (struct sockaddr *)&sk->addr, sizeof(sk->addr)); + if (ret) { + free(sk); + return NULL; + } + + sk->sip = ip->sip; + sk->dip = ip->dip; + sk->sport = tcp->sport; + sk->dport = tcp->dport; + + mutex_lock(sk_lock); + list_add_tail(&sk->list, sk_head); + mutex_unlock(sk_lock); + + return sk; +} + +static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_len) +{ + struct uip_info *info; + struct uip_eth *eth2; + struct uip_tcp *tcp2; + struct uip_buf *buf; + struct uip_ip *ip2; + + info = sk->info; + + /* + * Get free buffer to send data to guest + */ + buf = uip_buf_get_free(info); + + /* + * Cook a ethernet frame + */ + tcp2 = (struct uip_tcp *)buf->eth; + eth2 = (struct uip_eth *)buf->eth; + ip2 = (struct uip_ip *)buf->eth; + + eth2->src = info->host_mac; + eth2->dst = info->guest_mac; + eth2->type = htons(UIP_ETH_P_IP); + + ip2->vhl = UIP_IP_VER_4 | UIP_IP_HDR_LEN; + ip2->tos = 0; + ip2->id = 0; + ip2->flgfrag = 0; + ip2->ttl = UIP_IP_TTL; + ip2->proto = UIP_IP_P_TCP; + ip2->csum = 0; + ip2->sip = sk->dip; + ip2->dip = sk->sip; + + tcp2->sport = sk->dport; + tcp2->dport = sk->sport; + tcp2->seq = htonl(sk->seq_server); + tcp2->ack = htonl(sk->ack_server); + /* + * Diable TCP options, tcp hdr len equals 20 bytes + */ + tcp2->off = UIP_TCP_HDR_LEN; + tcp2->flg = flag; + tcp2->win = htons(UIP_TCP_WIN_SIZE); + tcp2->csum = 0; + tcp2->urgent = 0; + + if (payload_len > 0) + memcpy(uip_tcp_payload(tcp2), sk->payload, payload_len); + + ip2->len = htons(uip_tcp_hdrlen(tcp2) + payload_len + uip_ip_hdrlen(ip2)); + ip2->csum = uip_csum_ip(ip2); + tcp2->csum = uip_csum_tcp(tcp2); + + /* + * virtio_net_hdr + */ + buf->vnet_len = sizeof(struct virtio_net_hdr); + memset(buf->vnet, 0, buf->vnet_len); + + buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth); + + /* + * Increase server seq + */ + sk->seq_server += payload_len; + + /* + * Send data received from socket to guest + */ + uip_buf_set_used(info, buf); + + return 0; +} + +static void *uip_tcp_socket_thread(void *p) +{ + struct uip_tcp_socket *sk; + u8 *payload; + int ret; + + sk = p; + + payload = malloc(UIP_MAX_TCP_PAYLOAD); + sk->payload = payload; + if (!sk->payload) + goto out; + + while (1) { + + ret = read(sk->fd, payload, UIP_MAX_TCP_PAYLOAD); + + if (ret <= 0 || ret > UIP_MAX_TCP_PAYLOAD) + goto out; + + uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, ret); + + } + +out: + /* + * Close server to guest TCP connection + */ + uip_tcp_socket_close(sk, SHUT_RD); + + uip_tcp_payload_send(sk, UIP_TCP_FLAG_FIN | UIP_TCP_FLAG_ACK, 0); + sk->seq_server += 1; + + sk->read_done = 1; + + free(sk->payload); + pthread_exit(NULL); + + return NULL; +} + +static int uip_tcp_socket_receive(struct uip_tcp_socket *sk) +{ + if (sk->thread == 0) + return pthread_create(&sk->thread, NULL, uip_tcp_socket_thread, (void *)sk); + + return 0; +} + +static int uip_tcp_socket_send(struct uip_tcp_socket *sk, struct uip_tcp *tcp) +{ + int len; + int ret; + u8 *payload; + + if (sk->write_done) + return 0; + + payload = uip_tcp_payload(tcp); + len = uip_tcp_payloadlen(tcp); + + ret = write(sk->fd, payload, len); + if (ret != len) + pr_warning("tcp send error"); + + return ret; +} + +int uip_tx_do_ipv4_tcp(struct uip_tx_arg *arg) +{ + struct uip_tcp_socket *sk; + struct uip_tcp *tcp; + struct uip_ip *ip; + int ret; + + tcp = (struct uip_tcp *)arg->eth; + ip = (struct uip_ip *)arg->eth; + + /* + * Guest is trying to start a TCP session, let's fake SYN-ACK to guest + */ + if (uip_tcp_is_syn(tcp)) { + sk = uip_tcp_socket_alloc(arg, ip->sip, ip->dip, tcp->sport, tcp->dport); + if (!sk) + return -1; + + /* + * Setup ISN number + */ + sk->isn_guest = uip_tcp_isn(tcp); + sk->isn_server = uip_tcp_isn_alloc(); + + sk->seq_server = sk->isn_server; + sk->ack_server = sk->isn_guest + 1; + uip_tcp_payload_send(sk, UIP_TCP_FLAG_SYN | UIP_TCP_FLAG_ACK, 0); + sk->seq_server += 1; + + /* + * Start receive thread for data from remote to guest + */ + uip_tcp_socket_receive(sk); + + goto out; + } + + /* + * Find socket we have allocated + */ + sk = uip_tcp_socket_find(arg, ip->sip, ip->dip, tcp->sport, tcp->dport); + if (!sk) + return -1; + + sk->guest_acked = ntohl(tcp->ack); + + if (uip_tcp_is_fin(tcp)) { + if (sk->write_done) + goto out; + + sk->write_done = 1; + sk->ack_server += 1; + uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, 0); + + /* + * Close guest to server TCP connection + */ + uip_tcp_socket_close(sk, SHUT_WR); + + goto out; + } + + /* + * Ignore guest to server frames with zero tcp payload + */ + if (uip_tcp_payloadlen(tcp) == 0) + goto out; + + /* + * Sent out TCP data to remote host + */ + ret = uip_tcp_socket_send(sk, tcp); + if (ret < 0) + return -1; + /* + * Send ACK to guest imediately + */ + sk->ack_server += ret; + uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, 0); + +out: + return 0; +}