From patchwork Fri Mar 4 08:41:26 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Changlong Xie X-Patchwork-Id: 8500221 Return-Path: X-Original-To: patchwork-xen-devel@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork1.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.136]) by patchwork1.web.kernel.org (Postfix) with ESMTP id 97DA99F659 for ; Fri, 4 Mar 2016 08:42:39 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 60034201F4 for ; Fri, 4 Mar 2016 08:42:38 +0000 (UTC) Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120]) (using TLSv1.2 with cipher AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 25C9F201F2 for ; Fri, 4 Mar 2016 08:42:36 +0000 (UTC) Received: from localhost ([127.0.0.1] helo=lists.xenproject.org) by lists.xen.org with esmtp (Exim 4.84) (envelope-from ) id 1ablHU-0007Jx-Q5; Fri, 04 Mar 2016 08:40:16 +0000 Received: from mail6.bemta3.messagelabs.com ([195.245.230.39]) by lists.xen.org with esmtp (Exim 4.84) (envelope-from ) id 1ablHS-00077j-Ru for xen-devel@lists.xen.org; Fri, 04 Mar 2016 08:40:14 +0000 Received: from [85.158.137.68] by server-5.bemta-3.messagelabs.com id 78/3D-03651-EE949D65; Fri, 04 Mar 2016 08:40:14 +0000 X-Env-Sender: xiecl.fnst@cn.fujitsu.com X-Msg-Ref: server-9.tower-31.messagelabs.com!1457080803!4530056!5 X-Originating-IP: [59.151.112.132] X-SpamReason: No, hits=0.0 required=7.0 tests= X-StarScan-Received: X-StarScan-Version: 8.11; banners=-,-,- X-VirusChecked: Checked Received: (qmail 54952 invoked from network); 4 Mar 2016 08:40:13 -0000 Received: from cn.fujitsu.com (HELO heian.cn.fujitsu.com) (59.151.112.132) by server-9.tower-31.messagelabs.com with SMTP; 4 Mar 2016 08:40:13 -0000 X-IronPort-AV: E=Sophos;i="5.22,518,1449504000"; d="scan'208";a="4230670" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 04 Mar 2016 16:40:09 +0800 Received: from G08CNEXCHPEKD01.g08.fujitsu.local (unknown [10.167.33.80]) by cn.fujitsu.com (Postfix) with ESMTP id F2C8542B4AFB; Fri, 4 Mar 2016 16:40:04 +0800 (CST) Received: from changlox.g08.fujitsu.local (10.167.225.55) by G08CNEXCHPEKD01.g08.fujitsu.local (10.167.33.89) with Microsoft SMTP Server (TLS) id 14.3.181.6; Fri, 4 Mar 2016 16:40:04 +0800 From: Changlong Xie To: xen devel , Konrad Rzeszutek Wilk , Andrew Cooper , Ian Campbell , Ian Jackson , Wei Liu Date: Fri, 4 Mar 2016 16:41:26 +0800 Message-ID: <1457080891-26054-23-git-send-email-xiecl.fnst@cn.fujitsu.com> X-Mailer: git-send-email 1.9.3 In-Reply-To: <1457080891-26054-1-git-send-email-xiecl.fnst@cn.fujitsu.com> References: <1457080891-26054-1-git-send-email-xiecl.fnst@cn.fujitsu.com> MIME-Version: 1.0 X-Originating-IP: [10.167.225.55] X-yoursite-MailScanner-ID: F2C8542B4AFB.A3099 X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: xiecl.fnst@cn.fujitsu.com X-Spam-Status: No, score=-1.9 required=5.0 tests=BAYES_00, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 Cc: Lars Kurth , Changlong Xie , Wen Congyang , Gui Jianfeng , Jiang Yunhong , Dong Eddie , Anthony Perard , Shriram Rajagopalan , Yang Hongyang Subject: [Xen-devel] [PATCH v11 22/27] COLO proxy: implement setup/teardown of COLO proxy module X-BeenThere: xen-devel@lists.xen.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: Xen developer discussion List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Errors-To: xen-devel-bounces@lists.xen.org Sender: "Xen-devel" X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Wen Congyang setup/teardown of COLO proxy module. we use netlink to communicate with proxy module. About colo-proxy module: https://lkml.org/lkml/2015/6/18/32 How to use: http://wiki.xen.org/wiki/COLO_-_Coarse_Grain_Lock_Stepping Signed-off-by: Yang Hongyang Signed-off-by: Wen Congyang Signed-off-by: Changlong Xie --- tools/libxl/Makefile | 1 + tools/libxl/libxl_colo.h | 27 +++++ tools/libxl/libxl_colo_proxy.c | 218 +++++++++++++++++++++++++++++++++++++++++ tools/libxl/libxl_internal.h | 3 + 4 files changed, 249 insertions(+) create mode 100644 tools/libxl/libxl_colo_proxy.c diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile index 28d54d0..6fea9e0 100644 --- a/tools/libxl/Makefile +++ b/tools/libxl/Makefile @@ -67,6 +67,7 @@ endif LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o LIBXL_OBJS-y += libxl_colo_qdisk.o +LIBXL_OBJS-y += libxl_colo_proxy.o LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h index 90345f4..9e7f99c 100644 --- a/tools/libxl/libxl_colo.h +++ b/tools/libxl/libxl_colo.h @@ -16,21 +16,42 @@ #ifndef LIBXL_COLO_H #define LIBXL_COLO_H +#include + struct libxl__ao; struct libxl__egc; struct libxl__colo_save_state; struct libxl__checkpoint_devices_state; +#define NETLINK_COLO 28 + enum { LIBXL_COLO_SETUPED, LIBXL_COLO_SUSPENDED, LIBXL_COLO_RESUMED, }; +enum colo_netlink_op { + COLO_QUERY_CHECKPOINT = (NLMSG_MIN_TYPE + 1), + COLO_CHECKPOINT, + COLO_FAILOVER, + COLO_PROXY_INIT, + COLO_PROXY_RESET, /* UNUSED, will be used for continuous FT */ +}; + typedef struct libxl__colo_qdisk { bool setuped; } libxl__colo_qdisk; +typedef struct libxl__colo_proxy_state libxl__colo_proxy_state; +struct libxl__colo_proxy_state { + /* set by caller of colo_proxy_setup */ + struct libxl__ao *ao; + + int sock_fd; + int index; +}; + typedef struct libxl__domain_create_state libxl__domain_create_state; typedef void libxl__domain_create_cb(struct libxl__egc *egc, libxl__domain_create_state *dcs, @@ -58,6 +79,9 @@ struct libxl__colo_restore_state { bool qdisk_setuped; const char *host; const char *port; + + /* private, used by colo-proxy */ + libxl__colo_proxy_state cps; }; int init_subkind_qdisk(struct libxl__checkpoint_devices_state *cds); @@ -73,4 +97,7 @@ extern void libxl__colo_save_setup(struct libxl__egc *egc, extern void libxl__colo_save_teardown(struct libxl__egc *egc, struct libxl__colo_save_state *css, int rc); +extern int colo_proxy_setup(libxl__colo_proxy_state *cps); +extern void colo_proxy_teardown(libxl__colo_proxy_state *cps); + #endif diff --git a/tools/libxl/libxl_colo_proxy.c b/tools/libxl/libxl_colo_proxy.c new file mode 100644 index 0000000..2b3baa3 --- /dev/null +++ b/tools/libxl/libxl_colo_proxy.c @@ -0,0 +1,218 @@ +/* + * Copyright (C) 2015 FUJITSU LIMITED + * Author: Yang Hongyang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include "libxl_osdeps.h" /* must come before any other headers */ + +#include "libxl_internal.h" + +/* ========= colo-proxy: helper functions ========== */ + +static int colo_proxy_send(libxl__colo_proxy_state *cps, uint8_t *buff, + uint64_t size, int type) +{ + struct sockaddr_nl sa; + struct nlmsghdr msg; + struct iovec iov; + struct msghdr mh; + int ret; + + STATE_AO_GC(cps->ao); + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + sa.nl_pid = 0; + sa.nl_groups = 0; + + msg.nlmsg_len = NLMSG_SPACE(0); + msg.nlmsg_flags = NLM_F_REQUEST; + if (type == COLO_PROXY_INIT) { + msg.nlmsg_flags |= NLM_F_ACK; + } + msg.nlmsg_seq = 0; + /* This is untrusty */ + msg.nlmsg_pid = cps->index; + msg.nlmsg_type = type; + + iov.iov_base = &msg; + iov.iov_len = msg.nlmsg_len; + + mh.msg_name = &sa; + mh.msg_namelen = sizeof(sa); + mh.msg_iov = &iov; + mh.msg_iovlen = 1; + mh.msg_control = NULL; + mh.msg_controllen = 0; + mh.msg_flags = 0; + + ret = sendmsg(cps->sock_fd, &mh, 0); + if (ret <= 0) { + LOG(ERROR, "can't send msg to kernel by netlink: %s", + strerror(errno)); + } + + return ret; +} + +/* error: return -1, otherwise return 0 */ +static int64_t colo_proxy_recv(libxl__colo_proxy_state *cps, uint8_t **buff, + unsigned int timeout_us) +{ + struct sockaddr_nl sa; + struct iovec iov; + struct msghdr mh = { + .msg_name = &sa, + .msg_namelen = sizeof(sa), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + struct timeval tv; + uint32_t size = 16384; + int64_t len = 0; + int ret; + + STATE_AO_GC(cps->ao); + uint8_t *tmp = libxl__malloc(NOGC, size); + + if (timeout_us) { + tv.tv_sec = timeout_us / 1000000; + tv.tv_usec = timeout_us % 1000000; + setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + } + + iov.iov_base = tmp; + iov.iov_len = size; +next: + ret = recvmsg(cps->sock_fd, &mh, 0); + if (ret <= 0) { + if (errno != EAGAIN && errno != EWOULDBLOCK) + LOGE(ERROR, "can't recv msg from kernel by netlink"); + goto err; + } + + len += ret; + if (mh.msg_flags & MSG_TRUNC) { + size += 16384; + tmp = libxl__realloc(NOGC, tmp, size); + iov.iov_base = tmp + len; + iov.iov_len = size - len; + goto next; + } + + *buff = tmp; + ret = len; + goto out; + +err: + free(tmp); + *buff = NULL; + +out: + if (timeout_us) { + tv.tv_sec = 0; + tv.tv_usec = 0; + setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + } + return ret; +} + +/* ========= colo-proxy: setup and teardown ========== */ + +int colo_proxy_setup(libxl__colo_proxy_state *cps) +{ + int skfd = 0; + struct sockaddr_nl sa; + struct nlmsghdr *h; + int i = 1; + int ret = ERROR_FAIL; + uint8_t *buff = NULL; + int64_t size; + + STATE_AO_GC(cps->ao); + + skfd = socket(PF_NETLINK, SOCK_RAW, NETLINK_COLO); + if (skfd < 0) { + LOG(ERROR, "can not create a netlink socket: %s", strerror(errno)); + goto out; + } + cps->sock_fd = skfd; + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + sa.nl_groups = 0; +retry: + sa.nl_pid = i++; + + if (i > 10) { + LOG(ERROR, "netlink bind error"); + goto out; + } + + ret = bind(skfd, (struct sockaddr *)&sa, sizeof(sa)); + if (ret < 0 && errno == EADDRINUSE) { + LOG(ERROR, "colo index %d has already in used", sa.nl_pid); + goto retry; + } else if (ret < 0) { + LOG(ERROR, "netlink bind error"); + goto out; + } + + cps->index = sa.nl_pid; + ret = colo_proxy_send(cps, NULL, 0, COLO_PROXY_INIT); + if (ret < 0) { + goto out; + } + /* receive ack */ + size = colo_proxy_recv(cps, &buff, 500000); + if (size < 0) { + LOG(ERROR, "Can't recv msg from kernel by netlink: %s", + strerror(errno)); + goto out; + } + + if (size) { + h = (struct nlmsghdr *)buff; + if (h->nlmsg_type == NLMSG_ERROR) { + /* ack's type is NLMSG_ERROR */ + struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h); + + if (size - sizeof(*h) < sizeof(*err)) { + LOG(ERROR, "NLMSG_LENGTH is too short"); + goto out; + } + + if (err->error) { + LOG(ERROR, "NLMSG_ERROR contains error %d", err->error); + goto out; + } + } + } + + ret = 0; + +out: + free(buff); + if (ret) { + close(cps->sock_fd); + cps->sock_fd = -1; + } + return ret; +} + +void colo_proxy_teardown(libxl__colo_proxy_state *cps) +{ + if (cps->sock_fd >= 0) { + close(cps->sock_fd); + cps->sock_fd = -1; + } +} diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 0aeaf18..4940b59 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -3194,6 +3194,9 @@ struct libxl__colo_save_state { /* private, used by qdisk block replication */ bool qdisk_used; bool qdisk_setuped; + + /* private, used by colo-proxy */ + libxl__colo_proxy_state cps; }; typedef struct libxl__logdirty_switch {