From patchwork Fri Sep 27 07:00:48 2019
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: =?utf-8?b?SsO8cmdlbiBHcm/Dnw==?= <jgross@suse.com>
X-Patchwork-Id: 11163955
Return-Path: <SRS0=1d2Y=XW=lists.xenproject.org=xen-devel-bounces@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D118717EE
	for <patchwork-xen-devel@patchwork.kernel.org>;
 Fri, 27 Sep 2019 07:03:48 +0000 (UTC)
Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by mail.kernel.org (Postfix) with ESMTPS id AAA0F207FF
	for <patchwork-xen-devel@patchwork.kernel.org>;
 Fri, 27 Sep 2019 07:03:48 +0000 (UTC)
DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org AAA0F207FF
Authentication-Results: mail.kernel.org;
 dmarc=none (p=none dis=none) header.from=suse.com
Authentication-Results: mail.kernel.org;
 spf=none smtp.mailfrom=xen-devel-bounces@lists.xenproject.org
Received: from localhost ([127.0.0.1] helo=lists.xenproject.org)
	by lists.xenproject.org with esmtp (Exim 4.89)
	(envelope-from <xen-devel-bounces@lists.xenproject.org>)
	id 1iDkH2-0005MU-Ns; Fri, 27 Sep 2019 07:02:40 +0000
Received: from us1-rack-iad1.inumbo.com ([172.99.69.81])
 by lists.xenproject.org with esmtp (Exim 4.89)
 (envelope-from <SRS0=Rbpq=XW=suse.com=jgross@srs-us1.protection.inumbo.net>)
 id 1iDkGz-0005Ge-Sw
 for xen-devel@lists.xenproject.org; Fri, 27 Sep 2019 07:02:37 +0000
X-Inumbo-ID: 95397b5e-e0f4-11e9-b588-bc764e2007e4
Received: from mx1.suse.de (unknown [195.135.220.15])
 by localhost (Halon) with ESMTPS
 id 95397b5e-e0f4-11e9-b588-bc764e2007e4;
 Fri, 27 Sep 2019 07:01:09 +0000 (UTC)
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.220.254])
 by mx1.suse.de (Postfix) with ESMTP id 0417AAE61;
 Fri, 27 Sep 2019 07:01:08 +0000 (UTC)
From: Juergen Gross <jgross@suse.com>
To: xen-devel@lists.xenproject.org
Date: Fri, 27 Sep 2019 09:00:48 +0200
Message-Id: <20190927070050.12405-45-jgross@suse.com>
X-Mailer: git-send-email 2.16.4
In-Reply-To: <20190927070050.12405-1-jgross@suse.com>
References: <20190927070050.12405-1-jgross@suse.com>
Subject: [Xen-devel] [PATCH v4 44/46] xen/sched: support core scheduling for
 moving cpus to/from cpupools
X-BeenThere: xen-devel@lists.xenproject.org
X-Mailman-Version: 2.1.23
Precedence: list
List-Id: Xen developer discussion <xen-devel.lists.xenproject.org>
List-Unsubscribe: <https://lists.xenproject.org/mailman/options/xen-devel>,
 <mailto:xen-devel-request@lists.xenproject.org?subject=unsubscribe>
List-Post: <mailto:xen-devel@lists.xenproject.org>
List-Help: <mailto:xen-devel-request@lists.xenproject.org?subject=help>
List-Subscribe: <https://lists.xenproject.org/mailman/listinfo/xen-devel>,
 <mailto:xen-devel-request@lists.xenproject.org?subject=subscribe>
Cc: Juergen Gross <jgross@suse.com>, Tim Deegan <tim@xen.org>,
 Stefano Stabellini <sstabellini@kernel.org>, Wei Liu <wl@xen.org>,
 Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
 George Dunlap <George.Dunlap@eu.citrix.com>,
 Andrew Cooper <andrew.cooper3@citrix.com>,
 Ian Jackson <ian.jackson@eu.citrix.com>, Dario Faggioli <dfaggioli@suse.com>,
 Julien Grall <julien.grall@arm.com>, Jan Beulich <jbeulich@suse.com>
MIME-Version: 1.0
Errors-To: xen-devel-bounces@lists.xenproject.org
Sender: "Xen-devel" <xen-devel-bounces@lists.xenproject.org>

With core scheduling active it is necessary to move multiple cpus at
the same time to or from a cpupool in order to avoid split scheduling
resources in between.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Dario Faggioli <dfaggioli@suse.com>
---
V1: new patch
---
 xen/common/cpupool.c       | 100 +++++++++++++++++++++++++++++++++------------
 xen/common/schedule.c      |   3 +-
 xen/include/xen/sched-if.h |   1 +
 3 files changed, 76 insertions(+), 28 deletions(-)

diff --git a/xen/common/cpupool.c b/xen/common/cpupool.c
index 04c3b3c04b..f7a13c7a4c 100644
--- a/xen/common/cpupool.c
+++ b/xen/common/cpupool.c
@@ -268,23 +268,30 @@ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
 {
     int ret;
     struct domain *d;
+    const cpumask_t *cpus;
+
+    cpus = sched_get_opt_cpumask(c->gran, cpu);
 
     if ( (cpupool_moving_cpu == cpu) && (c != cpupool_cpu_moving) )
         return -EADDRNOTAVAIL;
-    ret = schedule_cpu_add(cpu, c);
+    ret = schedule_cpu_add(cpumask_first(cpus), c);
     if ( ret )
         return ret;
 
-    cpumask_clear_cpu(cpu, &cpupool_free_cpus);
+    rcu_read_lock(&sched_res_rculock);
+
+    cpumask_andnot(&cpupool_free_cpus, &cpupool_free_cpus, cpus);
     if (cpupool_moving_cpu == cpu)
     {
         cpupool_moving_cpu = -1;
         cpupool_put(cpupool_cpu_moving);
         cpupool_cpu_moving = NULL;
     }
-    cpumask_set_cpu(cpu, c->cpu_valid);
+    cpumask_or(c->cpu_valid, c->cpu_valid, cpus);
     cpumask_and(c->res_valid, c->cpu_valid, &sched_res_mask);
 
+    rcu_read_unlock(&sched_res_rculock);
+
     rcu_read_lock(&domlist_read_lock);
     for_each_domain_in_cpupool(d, c)
     {
@@ -298,6 +305,7 @@ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
 static int cpupool_unassign_cpu_finish(struct cpupool *c)
 {
     int cpu = cpupool_moving_cpu;
+    const cpumask_t *cpus;
     struct domain *d;
     int ret;
 
@@ -310,7 +318,10 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
      */
     rcu_read_lock(&domlist_read_lock);
     ret = cpu_disable_scheduler(cpu);
-    cpumask_set_cpu(cpu, &cpupool_free_cpus);
+
+    rcu_read_lock(&sched_res_rculock);
+    cpus = get_sched_res(cpu)->cpus;
+    cpumask_or(&cpupool_free_cpus, &cpupool_free_cpus, cpus);
 
     /*
      * cpu_disable_scheduler() returning an error doesn't require resetting
@@ -323,7 +334,7 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
     {
         ret = schedule_cpu_rm(cpu);
         if ( ret )
-            cpumask_clear_cpu(cpu, &cpupool_free_cpus);
+            cpumask_andnot(&cpupool_free_cpus, &cpupool_free_cpus, cpus);
         else
         {
             cpupool_moving_cpu = -1;
@@ -331,6 +342,7 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
             cpupool_cpu_moving = NULL;
         }
     }
+    rcu_read_unlock(&sched_res_rculock);
 
     for_each_domain_in_cpupool(d, c)
     {
@@ -345,6 +357,7 @@ static int cpupool_unassign_cpu_start(struct cpupool *c, unsigned int cpu)
 {
     int ret;
     struct domain *d;
+    const cpumask_t *cpus;
 
     spin_lock(&cpupool_lock);
     ret = -EADDRNOTAVAIL;
@@ -353,7 +366,11 @@ static int cpupool_unassign_cpu_start(struct cpupool *c, unsigned int cpu)
         goto out;
 
     ret = 0;
-    if ( (c->n_dom > 0) && (cpumask_weight(c->cpu_valid) == 1) &&
+    rcu_read_lock(&sched_res_rculock);
+    cpus = get_sched_res(cpu)->cpus;
+
+    if ( (c->n_dom > 0) &&
+         (cpumask_weight(c->cpu_valid) == cpumask_weight(cpus)) &&
          (cpu != cpupool_moving_cpu) )
     {
         rcu_read_lock(&domlist_read_lock);
@@ -375,9 +392,10 @@ static int cpupool_unassign_cpu_start(struct cpupool *c, unsigned int cpu)
     cpupool_moving_cpu = cpu;
     atomic_inc(&c->refcnt);
     cpupool_cpu_moving = c;
-    cpumask_clear_cpu(cpu, c->cpu_valid);
+    cpumask_andnot(c->cpu_valid, c->cpu_valid, cpus);
     cpumask_and(c->res_valid, c->cpu_valid, &sched_res_mask);
 
+    rcu_read_unlock(&domlist_read_lock);
 out:
     spin_unlock(&cpupool_lock);
 
@@ -417,11 +435,13 @@ static int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu)
 {
     int work_cpu;
     int ret;
+    unsigned int master_cpu;
 
     debugtrace_printk("cpupool_unassign_cpu(pool=%d,cpu=%d)\n",
                       c->cpupool_id, cpu);
 
-    ret = cpupool_unassign_cpu_start(c, cpu);
+    master_cpu = sched_get_resource_cpu(cpu);
+    ret = cpupool_unassign_cpu_start(c, master_cpu);
     if ( ret )
     {
         debugtrace_printk("cpupool_unassign_cpu(pool=%d,cpu=%d) ret %d\n",
@@ -429,12 +449,12 @@ static int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu)
         return ret;
     }
 
-    work_cpu = smp_processor_id();
-    if ( work_cpu == cpu )
+    work_cpu = sched_get_resource_cpu(smp_processor_id());
+    if ( work_cpu == master_cpu )
     {
         work_cpu = cpumask_first(cpupool0->cpu_valid);
-        if ( work_cpu == cpu )
-            work_cpu = cpumask_next(cpu, cpupool0->cpu_valid);
+        if ( work_cpu == master_cpu )
+            work_cpu = cpumask_last(cpupool0->cpu_valid);
     }
     return continue_hypercall_on_cpu(work_cpu, cpupool_unassign_cpu_helper, c);
 }
@@ -500,6 +520,7 @@ void cpupool_rm_domain(struct domain *d)
 static int cpupool_cpu_add(unsigned int cpu)
 {
     int ret = 0;
+    const cpumask_t *cpus;
 
     spin_lock(&cpupool_lock);
     cpumask_clear_cpu(cpu, &cpupool_locked_cpus);
@@ -513,7 +534,11 @@ static int cpupool_cpu_add(unsigned int cpu)
      */
     rcu_read_lock(&sched_res_rculock);
     get_sched_res(cpu)->cpupool = NULL;
-    ret = cpupool_assign_cpu_locked(cpupool0, cpu);
+
+    cpus = sched_get_opt_cpumask(cpupool0->gran, cpu);
+    if ( cpumask_subset(cpus, &cpupool_free_cpus) )
+        ret = cpupool_assign_cpu_locked(cpupool0, cpu);
+
     rcu_read_unlock(&sched_res_rculock);
 
     spin_unlock(&cpupool_lock);
@@ -548,27 +573,33 @@ static void cpupool_cpu_remove(unsigned int cpu)
 static int cpupool_cpu_remove_prologue(unsigned int cpu)
 {
     int ret = 0;
+    cpumask_t *cpus;
+    unsigned int master_cpu;
 
     spin_lock(&cpupool_lock);
 
-    if ( cpumask_test_cpu(cpu, &cpupool_locked_cpus) )
+    rcu_read_lock(&sched_res_rculock);
+    cpus = get_sched_res(cpu)->cpus;
+    master_cpu = sched_get_resource_cpu(cpu);
+    if ( cpumask_intersects(cpus, &cpupool_locked_cpus) )
         ret = -EBUSY;
     else
         cpumask_set_cpu(cpu, &cpupool_locked_cpus);
+    rcu_read_unlock(&sched_res_rculock);
 
     spin_unlock(&cpupool_lock);
 
     if ( ret )
         return  ret;
 
-    if ( cpumask_test_cpu(cpu, cpupool0->cpu_valid) )
+    if ( cpumask_test_cpu(master_cpu, cpupool0->cpu_valid) )
     {
         /* Cpupool0 is populated only after all cpus are up. */
         ASSERT(system_state == SYS_STATE_active);
 
-        ret = cpupool_unassign_cpu_start(cpupool0, cpu);
+        ret = cpupool_unassign_cpu_start(cpupool0, master_cpu);
     }
-    else if ( !cpumask_test_cpu(cpu, &cpupool_free_cpus) )
+    else if ( !cpumask_test_cpu(master_cpu, &cpupool_free_cpus) )
         ret = -ENODEV;
 
     return ret;
@@ -585,12 +616,13 @@ static void cpupool_cpu_remove_forced(unsigned int cpu)
 {
     struct cpupool **c;
     int ret;
+    unsigned int master_cpu = sched_get_resource_cpu(cpu);
 
     for_each_cpupool ( c )
     {
-        if ( cpumask_test_cpu(cpu, (*c)->cpu_valid) )
+        if ( cpumask_test_cpu(master_cpu, (*c)->cpu_valid) )
         {
-            ret = cpupool_unassign_cpu_start(*c, cpu);
+            ret = cpupool_unassign_cpu_start(*c, master_cpu);
             BUG_ON(ret);
             ret = cpupool_unassign_cpu_finish(*c);
             BUG_ON(ret);
@@ -658,29 +690,45 @@ int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op)
     case XEN_SYSCTL_CPUPOOL_OP_ADDCPU:
     {
         unsigned cpu;
+        const cpumask_t *cpus;
 
         cpu = op->cpu;
         debugtrace_printk("cpupool_assign_cpu(pool=%d,cpu=%d)\n",
                           op->cpupool_id, cpu);
+
         spin_lock(&cpupool_lock);
+
+        c = cpupool_find_by_id(op->cpupool_id);
+        ret = -ENOENT;
+        if ( c == NULL )
+            goto addcpu_out;
         if ( cpu == XEN_SYSCTL_CPUPOOL_PAR_ANY )
-            cpu = cpumask_first(&cpupool_free_cpus);
+        {
+            for_each_cpu ( cpu, &cpupool_free_cpus )
+            {
+                cpus = sched_get_opt_cpumask(c->gran, cpu);
+                if ( cpumask_subset(cpus, &cpupool_free_cpus) )
+                    break;
+            }
+            ret = -ENODEV;
+            if ( cpu >= nr_cpu_ids )
+                goto addcpu_out;
+        }
         ret = -EINVAL;
         if ( cpu >= nr_cpu_ids )
             goto addcpu_out;
         ret = -ENODEV;
-        if ( !cpumask_test_cpu(cpu, &cpupool_free_cpus) ||
-             cpumask_test_cpu(cpu, &cpupool_locked_cpus) )
-            goto addcpu_out;
-        c = cpupool_find_by_id(op->cpupool_id);
-        ret = -ENOENT;
-        if ( c == NULL )
+        cpus = sched_get_opt_cpumask(c->gran, cpu);
+        if ( !cpumask_subset(cpus, &cpupool_free_cpus) ||
+             cpumask_intersects(cpus, &cpupool_locked_cpus) )
             goto addcpu_out;
         ret = cpupool_assign_cpu_locked(c, cpu);
+
     addcpu_out:
         spin_unlock(&cpupool_lock);
         debugtrace_printk("cpupool_assign_cpu(pool=%d,cpu=%d) ret %d\n",
                           op->cpupool_id, cpu, ret);
+
     }
     break;
 
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 89238f801d..d2133558c8 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -2591,8 +2591,7 @@ static struct notifier_block cpu_schedule_nfb = {
     .notifier_call = cpu_schedule_callback
 };
 
-static const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt,
-                                              unsigned int cpu)
+const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu)
 {
     const cpumask_t *mask;
 
diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h
index 780735dda3..cd731d7172 100644
--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
@@ -638,5 +638,6 @@ affinity_balance_cpumask(const struct sched_unit *unit, int step,
 }
 
 void sched_rm_cpu(unsigned int cpu);
+const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu);
 
 #endif /* __XEN_SCHED_IF_H__ */