diff mbox series

[2/2] credit: Don't steal vcpus which have yielded

Message ID 20230630113756.672607-2-george.dunlap@cloud.com (mailing list archive)
State New, archived
Headers show
Series [1/2] credit: Limit load balancing to once per millisecond | expand

Commit Message

George Dunlap June 30, 2023, 11:37 a.m. UTC
On large systems with many vcpus yielding due to spinlock priority
inversion, it's not uncommon for a vcpu to yield its timeslice, only
to be immediately stolen by another pcpu looking for higher-priority
work.

To prevent this:

* Keep the YIELD flag until a vcpu is removed from a runqueue

* When looking for work to steal, skip vcpus which have yielded

NB that this does mean that sometimes a VM is inserted into an empty
runqueue; handle that case.

Signed-off-by: George Dunlap <george.dunlap@cloud.com>
---
CC: Dario Faggioli <dfaggioli@suse.com>
---
 xen/common/sched/credit.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)
diff mbox series

Patch

diff --git a/xen/common/sched/credit.c b/xen/common/sched/credit.c
index b8bdfd5f6a..7754e9b3a0 100644
--- a/xen/common/sched/credit.c
+++ b/xen/common/sched/credit.c
@@ -296,14 +296,10 @@  __runq_insert(struct csched_unit *svc)
      * runnable unit if we can.  The next runq_sort will bring it forward
      * within 30ms if the queue too long. */
     if ( test_bit(CSCHED_FLAG_UNIT_YIELD, &svc->flags)
-         && __runq_elem(iter)->pri > CSCHED_PRI_IDLE )
-    {
+         && __runq_elem(iter)->pri > CSCHED_PRI_IDLE
+         && iter->next != runq)
         iter=iter->next;
 
-        /* Some sanity checks */
-        BUG_ON(iter == runq);
-    }
-
     list_add_tail(&svc->runq_elem, iter);
 }
 
@@ -319,6 +315,11 @@  __runq_remove(struct csched_unit *svc)
 {
     BUG_ON( !__unit_on_runq(svc) );
     list_del_init(&svc->runq_elem);
+
+    /*
+     * Clear YIELD flag when scheduling back in
+     */
+    clear_bit(CSCHED_FLAG_UNIT_YIELD, &svc->flags);
 }
 
 static inline void
@@ -1638,6 +1639,13 @@  csched_runq_steal(int peer_cpu, int cpu, int pri, int balance_step)
         if ( speer->pri <= pri )
             break;
 
+        /*
+         * Don't steal a UNIT which has yielded; it's waiting for a
+         * reason
+         */
+        if (test_bit(CSCHED_FLAG_UNIT_YIELD, &speer->flags))
+            continue;
+
         /* Is this UNIT runnable on our PCPU? */
         unit = speer->unit;
         BUG_ON( is_idle_unit(unit) );
@@ -1955,11 +1963,6 @@  static void cf_check csched_schedule(
         dec_nr_runnable(sched_cpu);
     }
 
-    /*
-     * Clear YIELD flag before scheduling out
-     */
-    clear_bit(CSCHED_FLAG_UNIT_YIELD, &scurr->flags);
-
     do {
         snext = __runq_elem(runq->next);
 
@@ -1974,10 +1977,11 @@  static void cf_check csched_schedule(
         /*
          * SMP Load balance:
          *
-         * If the next highest priority local runnable UNIT has already eaten
-         * through its credits, look on other PCPUs to see if we have more
-         * urgent work... If not, csched_load_balance() will return snext, but
-         * already removed from the runq.
+         * If the next highest priority local runnable UNIT has
+         * already eaten through its credits (and we're below the
+         * balancing ratelimit), look on other PCPUs to see if we have
+         * more urgent work... If we don't, csched_load_balance() will
+         * return snext, but already removed from the runq.
          */
         if ( snext->pri <= CSCHED_PRI_TS_OVER
              && now - spc->last_load_balance > prv->load_balance_ratelimit) {