diff mbox

[v2,2/2] tools: utility to dump guest grant table info

Message ID 1499038453-17050-2-git-send-email-dongli.zhang@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dongli Zhang July 2, 2017, 11:34 p.m. UTC
As both xen-netfront and xen-blkfront support multi-queue, they would
consume a lot of grant table references when there are many paravirtual
devices and vcpus assigned to guest. Guest domU might panic or hang due to
grant allocation failure when nr_grant_frames in guest has reached its max
value.

This utility would help the administrators to diagnose xen issue. There is
only one command gnttab_query_size so far to monitor the guest grant table
frame usage on dom0 side so that it is not required to debug on guest
kernel side for crash/hang analysis anymore.

It is extensible for adding new commands for more diagnostic functions and
the framework of xen-diag.c is from xen-livepatch.c.

Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
---
Changed since v1:
  * rewrite xen-gnttab-query.c to xen-diag.c based on livepatch.c framework

---
 tools/misc/Makefile   |   4 ++
 tools/misc/xen-diag.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+)
 create mode 100644 tools/misc/xen-diag.c

Comments

Wei Liu July 3, 2017, 4:26 p.m. UTC | #1
On Mon, Jul 03, 2017 at 07:34:13AM +0800, Dongli Zhang wrote:
> As both xen-netfront and xen-blkfront support multi-queue, they would
> consume a lot of grant table references when there are many paravirtual
> devices and vcpus assigned to guest. Guest domU might panic or hang due to
> grant allocation failure when nr_grant_frames in guest has reached its max
> value.
> 
> This utility would help the administrators to diagnose xen issue. There is
> only one command gnttab_query_size so far to monitor the guest grant table
> frame usage on dom0 side so that it is not required to debug on guest
> kernel side for crash/hang analysis anymore.
> 
> It is extensible for adding new commands for more diagnostic functions and
> the framework of xen-diag.c is from xen-livepatch.c.
> 
> Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>

Acked-by: Wei Liu <wei.liu2@citrix.com>
Wei Liu July 4, 2017, 2:10 p.m. UTC | #2
I pushed your two patches and discovered you also need to patch
.gitignore. Could you please send a patch for that? thanks
Dongli Zhang July 4, 2017, 2:27 p.m. UTC | #3
Hi Wei,

I will send the patch based on staging.

Dongli Zhang

On 07/04/2017 10:10 PM, Wei Liu wrote:
> I pushed your two patches and discovered you also need to patch
> .gitignore. Could you please send a patch for that? thanks
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> https://lists.xen.org/xen-devel
>
diff mbox

Patch

diff --git a/tools/misc/Makefile b/tools/misc/Makefile
index 8152f7b..c1113b9 100644
--- a/tools/misc/Makefile
+++ b/tools/misc/Makefile
@@ -31,6 +31,7 @@  INSTALL_SBIN                   += xenperf
 INSTALL_SBIN                   += xenpm
 INSTALL_SBIN                   += xenwatchdogd
 INSTALL_SBIN                   += xen-livepatch
+INSTALL_SBIN                   += xen-diag
 INSTALL_SBIN += $(INSTALL_SBIN-y)
 
 # Everything to be installed in a private bin/
@@ -102,6 +103,9 @@  xenwatchdogd: xenwatchdogd.o
 xen-livepatch: xen-livepatch.o
 	$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS_libxenctrl) $(APPEND_LDFLAGS)
 
+xen-diag: xen-diag.o
+	$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS_libxenctrl) $(APPEND_LDFLAGS)
+
 xen-lowmemd: xen-lowmemd.o
 	$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS_libxenevtchn) $(LDLIBS_libxenctrl) $(LDLIBS_libxenstore) $(APPEND_LDFLAGS)
 
diff --git a/tools/misc/xen-diag.c b/tools/misc/xen-diag.c
new file mode 100644
index 0000000..1da50e1
--- /dev/null
+++ b/tools/misc/xen-diag.c
@@ -0,0 +1,129 @@ 
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <err.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <xenctrl.h>
+
+#include <xen/errno.h>
+#include <xen-tools/libs.h>
+
+static xc_interface *xch;
+
+#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+
+void show_help(void)
+{
+    fprintf(stderr,
+            "xen-diag: xen diagnostic utility\n"
+            "Usage: xen-diag command [args]\n"
+            "Commands:\n"
+            "  help                       display this help\n"
+            "  gnttab_query_size <domid>  dump the current and max grant frames for <domid>\n");
+}
+
+/* wrapper function */
+static int help_func(int argc, char *argv[])
+{
+    show_help();
+    return 0;
+}
+
+static int gnttab_query_size_func(int argc, char *argv[])
+{
+    int domid, rc = 1;
+    struct gnttab_query_size query;
+
+    if ( argc != 1 )
+    {
+        show_help();
+        return rc;
+    }
+
+    domid = strtol(argv[0], NULL, 10);
+    query.dom = domid;
+    rc = xc_gnttab_query_size(xch, &query);
+
+    if ( rc == 0 && (query.status == GNTST_okay) )
+        printf("domid=%d: nr_frames=%d, max_nr_frames=%d\n",
+               query.dom, query.nr_frames, query.max_nr_frames);
+
+    return rc == 0 && (query.status == GNTST_okay) ? 0 : 1;
+}
+
+struct {
+    const char *name;
+    int (*function)(int argc, char *argv[]);
+} main_options[] = {
+    { "help", help_func },
+    { "gnttab_query_size", gnttab_query_size_func},
+};
+
+int main(int argc, char *argv[])
+{
+    int ret, i;
+
+    /*
+     * Set stdout to be unbuffered to avoid having to fflush when
+     * printing without a newline.
+     */
+    setvbuf(stdout, NULL, _IONBF, 0);
+
+    if ( argc <= 1 )
+    {
+        show_help();
+        return 0;
+    }
+
+    for ( i = 0; i < ARRAY_SIZE(main_options); i++ )
+        if ( !strncmp(main_options[i].name, argv[1], strlen(argv[1])) )
+            break;
+
+    if ( i == ARRAY_SIZE(main_options) )
+    {
+        show_help();
+        return 0;
+    }
+    else
+    {
+        xch = xc_interface_open(0, 0, 0);
+        if ( !xch )
+        {
+            fprintf(stderr, "failed to get the handler\n");
+            return 0;
+        }
+
+        ret = main_options[i].function(argc - 2, argv + 2);
+
+        xc_interface_close(xch);
+    }
+
+    /*
+     * Exitcode 0 for success.
+     * Exitcode 1 for an error.
+     * Exitcode 2 if the operation should be retried for any reason (e.g. a
+     * timeout or because another operation was in progress).
+     */
+
+#define EXIT_TIMEOUT (EXIT_FAILURE + 1)
+
+    BUILD_BUG_ON(EXIT_SUCCESS != 0);
+    BUILD_BUG_ON(EXIT_FAILURE != 1);
+    BUILD_BUG_ON(EXIT_TIMEOUT != 2);
+
+    switch ( ret )
+    {
+    case 0:
+        return EXIT_SUCCESS;
+    case EAGAIN:
+    case EBUSY:
+        return EXIT_TIMEOUT;
+    default:
+        return EXIT_FAILURE;
+    }
+}