Message ID | 1457141967-3340-1-git-send-email-tiche@seas.upenn.edu (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Tianyang, Thank you very much for this version of patch. It is in a really good state, IMHO, although it does have a suspiciuous logic error inside, which can be corrected easily. I didn't mark out all repeated style issues. I think you can correct all of the style issues, such as the spaces in the code, in the next version. Hi Dario, Could you help check if my two concerns in the comments below make sense? One is small, about if we should use () to make the code clearer. The other is about the logic in the replenishment handler. On Fri, Mar 4, 2016 at 8:39 PM, Tianyang Chen <tiche@seas.upenn.edu> wrote: > Budget replenishment and enforcement are separated by adding > a replenishment timer, which fires at the next most imminent > release time of all runnable vcpus. > > A replenishment queue has been added to keep track of all replenishment > events. > > The following functions have major changes to manage the replenishment > events and timer. > > repl_handler(): It is a timer handler which is re-programmed > to fire at the nearest vcpu deadline to replenish vcpus. > > rt_schedule(): picks the highest runnable vcpu based on cpu > affinity and ret.time will be passed to schedule(). If an idle > vcpu is picked, -1 is returned to avoid busy-waiting. repl_update() > has been removed. > > rt_vcpu_wake(): when a vcpu wakes up, it tickles instead of > picking one from the run queue. > > rt_context_saved(): when context switching is finished, the > preempted vcpu will be put back into the run queue and it tickles. > > Simplified funtional graph: > > schedule.c SCHEDULE_SOFTIRQ: > rt_schedule(): > [spin_lock] > burn_budget(scurr) > snext = runq_pick() > [spin_unlock] > > sched_rt.c TIMER_SOFTIRQ > replenishment_timer_handler() > [spin_lock] > <for_each_vcpu_on_q(i)> { > replenish(i) > runq_tickle(i) > }> > program_timer() > [spin_lock] > > Signed-off-by: Tianyang Chen <tiche@seas.upenn.edu> > Signed-off-by: Meng Xu <mengxu@cis.upenn.edu> > Signed-off-by: Dagaen Golomb <dgolomb@seas.upenn.edu> > > --- > changes since v6: > Removed unnecessary vcpu_on_q() checks for runq_insert() > Renamed replenishment queue related functions without > underscores > New replq_reinsert() function for rt_vcpu_wake() > > changes since v5: > Removed unnecessary vcpu_on_replq() checks > deadline_queue_insert() returns a flag to indicate if it's > needed to re-program the timer > Removed unnecessary timer checks > Added helper functions to remove vcpus from queues > coding style > > Changes since v4: > removed unnecessary replenishment queue checks in vcpu_wake() > extended replq_remove() to all cases in vcpu_sleep() > used _deadline_queue_insert() helper function for both queues > _replq_insert() and _replq_remove() program timer internally > > Changes since v3: > removed running queue. > added repl queue to keep track of repl events. > timer is now per scheduler. > timer is init on a valid cpu in a cpupool. > --- > xen/common/sched_rt.c | 381 +++++++++++++++++++++++++++++++++++++------------ > 1 file changed, 286 insertions(+), 95 deletions(-) > > diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c > index bfed2e2..06803be 100644 > --- a/xen/common/sched_rt.c > +++ b/xen/common/sched_rt.c > @@ -16,6 +16,7 @@ > #include <xen/delay.h> > #include <xen/event.h> > #include <xen/time.h> > +#include <xen/timer.h> > #include <xen/perfc.h> > #include <xen/sched-if.h> > #include <xen/softirq.h> > @@ -87,7 +88,7 @@ > #define RTDS_DEFAULT_BUDGET (MICROSECS(4000)) > > #define UPDATE_LIMIT_SHIFT 10 > -#define MAX_SCHEDULE (MILLISECS(1)) > + > /* > * Flags > */ > @@ -142,6 +143,9 @@ static cpumask_var_t *_cpumask_scratch; > */ > static unsigned int nr_rt_ops; > > +/* handler for the replenishment timer */ > +static void repl_handler(void *data); > + > /* > * Systme-wide private data, include global RunQueue/DepletedQ > * Global lock is referenced by schedule_data.schedule_lock from all > @@ -152,7 +156,9 @@ struct rt_private { > struct list_head sdom; /* list of availalbe domains, used for dump */ > struct list_head runq; /* ordered list of runnable vcpus */ > struct list_head depletedq; /* unordered list of depleted vcpus */ > + struct list_head replq; /* ordered list of vcpus that need replenishment */ > cpumask_t tickled; /* cpus been tickled */ > + struct timer *repl_timer; /* replenishment timer */ > }; > > /* > @@ -160,6 +166,7 @@ struct rt_private { > */ > struct rt_vcpu { > struct list_head q_elem; /* on the runq/depletedq list */ > + struct list_head replq_elem;/* on the repl event list */ > > /* Up-pointers */ > struct rt_dom *sdom; > @@ -213,8 +220,14 @@ static inline struct list_head *rt_depletedq(const struct scheduler *ops) > return &rt_priv(ops)->depletedq; > } > > +static inline struct list_head *rt_replq(const struct scheduler *ops) > +{ > + return &rt_priv(ops)->replq; > +} > + > /* > - * Queue helper functions for runq and depletedq > + * Helper functions for manipulating the runqueue, the depleted queue, > + * and the replenishment events queue. > */ > static int > __vcpu_on_q(const struct rt_vcpu *svc) > @@ -228,6 +241,18 @@ __q_elem(struct list_head *elem) > return list_entry(elem, struct rt_vcpu, q_elem); > } > > +static struct rt_vcpu * > +replq_elem(struct list_head *elem) > +{ > + return list_entry(elem, struct rt_vcpu, replq_elem); > +} > + > +static int > +vcpu_on_replq(const struct rt_vcpu *svc) > +{ > + return !list_empty(&svc->replq_elem); > +} > + > /* > * Debug related code, dump vcpu/cpu information > */ > @@ -288,7 +313,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu) > static void > rt_dump(const struct scheduler *ops) > { > - struct list_head *runq, *depletedq, *iter; > + struct list_head *runq, *depletedq, *replq, *iter; > struct rt_private *prv = rt_priv(ops); > struct rt_vcpu *svc; > struct rt_dom *sdom; > @@ -301,6 +326,7 @@ rt_dump(const struct scheduler *ops) > > runq = rt_runq(ops); > depletedq = rt_depletedq(ops); > + replq = rt_replq(ops); > > printk("Global RunQueue info:\n"); > list_for_each( iter, runq ) > @@ -316,6 +342,13 @@ rt_dump(const struct scheduler *ops) > rt_dump_vcpu(ops, svc); > } > > + printk("Global Replenishment Event info:\n"); > + list_for_each( iter, replq ) > + { > + svc = replq_elem(iter); > + rt_dump_vcpu(ops, svc); > + } > + > printk("Domain info:\n"); > list_for_each( iter, &prv->sdom ) > { > @@ -380,11 +413,76 @@ rt_update_deadline(s_time_t now, struct rt_vcpu *svc) > return; > } > > +/* > + * A helper function that only removes a vcpu from a queue > + * and it returns 1 if the vcpu was in front of the list. > + */ > +static inline int > +deadline_queue_remove(struct list_head *queue, struct list_head *elem) > +{ > + int pos = 0; > + if( queue->next != elem ) style, missing space after if > + pos = 1; > + list_del_init(elem); > + return !pos; > +} > + > static inline void > __q_remove(struct rt_vcpu *svc) > { > - if ( __vcpu_on_q(svc) ) > - list_del_init(&svc->q_elem); > + ASSERT( __vcpu_on_q(svc) ); > + list_del_init(&svc->q_elem); > +} > + > +/* > + * Removing a vcpu from the replenishment queue could > + * re-program the timer for the next replenishment event > + * if it was at the front of the list. > + */ > +static inline void > +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc) > +{ > + struct rt_private *prv = rt_priv(ops); > + struct list_head *replq = rt_replq(ops); > + struct timer* repl_timer = prv->repl_timer; > + > + ASSERT( vcpu_on_replq(svc) ); > + > + if( deadline_queue_remove(replq,&svc->replq_elem) ) style.. space after if, and space before &; please correct all of the space issues. > + { > + /* re-arm the timer for the next replenishment event */ > + if( !list_empty(replq) ) > + { > + struct rt_vcpu *svc_next = replq_elem(replq->next); > + set_timer(repl_timer, svc_next->cur_deadline); > + } > + else > + stop_timer(repl_timer); > + } > +} > + > +/* > + * An utility function that inserts a vcpu to a > + * queue based on certain order (EDF). The returned > + * value is 1 if a vcpu has been inserted to the > + * front of a list. > + */ > +static int > +deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head *elem), > + struct rt_vcpu *svc, struct list_head *elem, struct list_head *queue) > +{ > + struct list_head *iter; > + int pos = 0; > + > + list_for_each(iter, queue) > + { > + struct rt_vcpu * iter_svc = (*_get_q_elem)(iter); > + if ( svc->cur_deadline <= iter_svc->cur_deadline ) > + break; > + pos++; > + } > + list_add_tail(elem, iter); > + return !pos; > } > > /* > @@ -397,27 +495,62 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc) > { > struct rt_private *prv = rt_priv(ops); > struct list_head *runq = rt_runq(ops); > - struct list_head *iter; > > ASSERT( spin_is_locked(&prv->lock) ); > - > ASSERT( !__vcpu_on_q(svc) ); > + ASSERT( vcpu_on_replq(svc) ); > > /* add svc to runq if svc still has budget */ > if ( svc->cur_budget > 0 ) > - { > - list_for_each(iter, runq) > - { > - struct rt_vcpu * iter_svc = __q_elem(iter); > - if ( svc->cur_deadline <= iter_svc->cur_deadline ) > - break; > - } > - list_add_tail(&svc->q_elem, iter); > - } > + deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq); > else > - { > list_add(&svc->q_elem, &prv->depletedq); > +} > + > +/* > + * Insert svc into the replenishment event list > + * in replenishment time order. > + * vcpus that need to be replished earlier go first. > + * The timer may be re-programmed if svc is inserted > + * at the front of the event list. > + */ > +static void > +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc) > +{ > + struct list_head *replq = rt_replq(ops); > + struct rt_private *prv = rt_priv(ops); > + struct timer *repl_timer = prv->repl_timer; > + > + ASSERT( !vcpu_on_replq(svc) ); > + > + if( deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq) ) > + set_timer(repl_timer,svc->cur_deadline); > +} > + > +/* > + * Removes and re-inserts an event to the replenishment queue. > + */ > +static void > +replq_reinsert(const struct scheduler *ops, struct rt_vcpu *svc) > +{ > + struct list_head *replq = rt_replq(ops); > + struct rt_private *prv = rt_priv(ops); > + struct timer *repl_timer = prv->repl_timer; > + > + ASSERT( vcpu_on_replq(svc) ); > + > + if( deadline_queue_remove(replq,&svc->replq_elem) ) > + { > + if( deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq) ) > + set_timer(repl_timer,svc->cur_deadline); > + else > + { > + struct rt_vcpu *svc_next = replq_elem(replq->next); > + set_timer(repl_timer, svc_next->cur_deadline); > + } > } > + else if( deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq) ) > + set_timer(repl_timer,svc->cur_deadline); > } > > /* > @@ -449,11 +582,18 @@ rt_init(struct scheduler *ops) > INIT_LIST_HEAD(&prv->sdom); > INIT_LIST_HEAD(&prv->runq); > INIT_LIST_HEAD(&prv->depletedq); > + INIT_LIST_HEAD(&prv->replq); > > cpumask_clear(&prv->tickled); > > ops->sched_data = prv; > > + /* > + * The timer initialization will happen later when > + * the first pcpu is added to this pool in alloc_pdata. > + */ > + prv->repl_timer = NULL; > + > return 0; > > no_mem: > @@ -473,6 +613,10 @@ rt_deinit(struct scheduler *ops) > xfree(_cpumask_scratch); > _cpumask_scratch = NULL; > } > + > + kill_timer(prv->repl_timer); > + xfree(prv->repl_timer); > + > ops->sched_data = NULL; > xfree(prv); > } > @@ -494,6 +638,17 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu) > if ( !alloc_cpumask_var(&_cpumask_scratch[cpu]) ) > return NULL; > > + if( prv->repl_timer == NULL ) > + { > + /* allocate the timer on the first cpu of this pool */ > + prv->repl_timer = xzalloc(struct timer); > + > + if(prv->repl_timer == NULL ) > + return NULL; > + > + init_timer(prv->repl_timer, repl_handler, (void *)ops, cpu); > + } > + > /* 1 indicates alloc. succeed in schedule.c */ > return (void *)1; > } > @@ -587,6 +742,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd) > return NULL; > > INIT_LIST_HEAD(&svc->q_elem); > + INIT_LIST_HEAD(&svc->replq_elem); > svc->flags = 0U; > svc->sdom = dd; > svc->vcpu = vc; > @@ -610,7 +766,8 @@ rt_free_vdata(const struct scheduler *ops, void *priv) > } > > /* > - * This function is called in sched_move_domain() in schedule.c > + * It is called in sched_move_domain() and sched_init_vcpu > + * in schedule.c > * When move a domain to a new cpupool. > * It inserts vcpus of moving domain to the scheduler's RunQ in > * dest. cpupool. > @@ -628,8 +785,13 @@ rt_vcpu_insert(const struct scheduler *ops, struct vcpu *vc) > if ( now >= svc->cur_deadline ) > rt_update_deadline(now, svc); > > - if ( !__vcpu_on_q(svc) && vcpu_runnable(vc) && !vc->is_running ) > - __runq_insert(ops, svc); > + if ( !__vcpu_on_q(svc) && vcpu_runnable(vc) ) > + { > + __replq_insert(ops, svc); > + > + if( !vc->is_running) > + __runq_insert(ops, svc); > + } > vcpu_schedule_unlock_irq(lock, vc); > > SCHED_STAT_CRANK(vcpu_insert); > @@ -652,6 +814,10 @@ rt_vcpu_remove(const struct scheduler *ops, struct vcpu *vc) > lock = vcpu_schedule_lock_irq(vc); > if ( __vcpu_on_q(svc) ) > __q_remove(svc); > + > + if( vcpu_on_replq(svc) ) > + __replq_remove(ops,svc); > + > vcpu_schedule_unlock_irq(lock, vc); > } > > @@ -784,44 +950,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t *mask) > } > > /* > - * Update vcpu's budget and > - * sort runq by insert the modifed vcpu back to runq > - * lock is grabbed before calling this function > - */ > -static void > -__repl_update(const struct scheduler *ops, s_time_t now) > -{ > - struct list_head *runq = rt_runq(ops); > - struct list_head *depletedq = rt_depletedq(ops); > - struct list_head *iter; > - struct list_head *tmp; > - struct rt_vcpu *svc = NULL; > - > - list_for_each_safe(iter, tmp, runq) > - { > - svc = __q_elem(iter); > - if ( now < svc->cur_deadline ) > - break; > - > - rt_update_deadline(now, svc); > - /* reinsert the vcpu if its deadline is updated */ > - __q_remove(svc); > - __runq_insert(ops, svc); > - } > - > - list_for_each_safe(iter, tmp, depletedq) > - { > - svc = __q_elem(iter); > - if ( now >= svc->cur_deadline ) > - { > - rt_update_deadline(now, svc); > - __q_remove(svc); /* remove from depleted queue */ > - __runq_insert(ops, svc); /* add to runq */ > - } > - } > -} > - > -/* > * schedule function for rt scheduler. > * The lock is already grabbed in schedule.c, no need to lock here > */ > @@ -840,8 +968,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched > /* burn_budget would return for IDLE VCPU */ > burn_budget(ops, scurr, now); > > - __repl_update(ops, now); > - > if ( tasklet_work_scheduled ) > { > trace_var(TRC_RTDS_SCHED_TASKLET, 1, 0, NULL); > @@ -868,6 +994,7 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched > set_bit(__RTDS_delayed_runq_add, &scurr->flags); > > snext->last_start = now; > + ret.time = -1; /* if an idle vcpu is picked */ > if ( !is_idle_vcpu(snext->vcpu) ) > { > if ( snext != scurr ) > @@ -880,9 +1007,8 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched > snext->vcpu->processor = cpu; > ret.migrated = 1; > } > + ret.time = snext->budget; /* invoke the scheduler next time */ > } > - > - ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */ > ret.task = snext->vcpu; > > return ret; > @@ -903,7 +1029,10 @@ rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) > if ( curr_on_cpu(vc->processor) == vc ) > cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); > else if ( __vcpu_on_q(svc) ) > + { > __q_remove(svc); > + __replq_remove(ops, svc); > + } > else if ( svc->flags & RTDS_delayed_runq_add ) > clear_bit(__RTDS_delayed_runq_add, &svc->flags); > } > @@ -1008,10 +1137,7 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) > { > struct rt_vcpu * const svc = rt_vcpu(vc); > s_time_t now = NOW(); > - struct rt_private *prv = rt_priv(ops); > - struct rt_vcpu *snext = NULL; /* highest priority on RunQ */ > - struct rt_dom *sdom = NULL; > - cpumask_t *online; > + bool_t missed; > > BUG_ON( is_idle_vcpu(vc) ); > > @@ -1033,32 +1159,40 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) > else > SCHED_STAT_CRANK(vcpu_wake_not_runnable); > > - /* If context hasn't been saved for this vcpu yet, we can't put it on > - * the Runqueue/DepletedQ. Instead, we set a flag so that it will be > - * put on the Runqueue/DepletedQ after the context has been saved. > + /* > + * If a deadline passed while svc was asleep/blocked, we need new > + * scheduling parameters (a new deadline and full budget). > + */ > + if ( (missed = now >= svc->cur_deadline) ) Dario, Is it better to have a () outside of now >= svc->cur_deadline to make the logic clearer, instead of relying on the operator's priority? > + rt_update_deadline(now, svc); > + > + /* > + * If context hasn't been saved for this vcpu yet, we can't put it on > + * the run-queue/depleted-queue. Instead, we set the appropriate flag, > + * the vcpu will be put back on queue after the context has been saved > + * (in rt_context_save()). > */ > if ( unlikely(svc->flags & RTDS_scheduled) ) > { > - set_bit(__RTDS_delayed_runq_add, &svc->flags); > + set_bit(__RTDS_delayed_runq_add, &svc->flags); > + /* > + * The vcpu is waking up already, and we didn't even had the time to > + * remove its next replenishment event from the replenishment queue > + * when it blocked! No big deal. If we did not miss the deadline in > + * the meantime, let's just leave it there. If we did, let's remove it > + * and queue a new one (to occur at our new deadline). > + */ > + if ( missed ) > + replq_reinsert(ops, svc); > return; > } > > - if ( now >= svc->cur_deadline) > - rt_update_deadline(now, svc); > - > + /* Replenishment event got cancelled when we blocked. Add it back. */ > + __replq_insert(ops, svc); > /* insert svc to runq/depletedq because svc is not in queue now */ > __runq_insert(ops, svc); > > - __repl_update(ops, now); > - > - ASSERT(!list_empty(&prv->sdom)); > - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); > - online = cpupool_domain_cpumask(sdom->dom); > - snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus */ > - > - runq_tickle(ops, snext); > - > - return; > + runq_tickle(ops, svc); > } > > /* > @@ -1069,10 +1203,6 @@ static void > rt_context_saved(const struct scheduler *ops, struct vcpu *vc) > { > struct rt_vcpu *svc = rt_vcpu(vc); > - struct rt_vcpu *snext = NULL; > - struct rt_dom *sdom = NULL; > - struct rt_private *prv = rt_priv(ops); > - cpumask_t *online; > spinlock_t *lock = vcpu_schedule_lock_irq(vc); > > clear_bit(__RTDS_scheduled, &svc->flags); > @@ -1084,15 +1214,11 @@ rt_context_saved(const struct scheduler *ops, struct vcpu *vc) > likely(vcpu_runnable(vc)) ) > { > __runq_insert(ops, svc); > - __repl_update(ops, NOW()); > - > - ASSERT(!list_empty(&prv->sdom)); > - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); > - online = cpupool_domain_cpumask(sdom->dom); > - snext = __runq_pick(ops, online); /* pick snext from ALL cpus */ > - > - runq_tickle(ops, snext); > + runq_tickle(ops, svc); > } > + else > + __replq_remove(ops, svc); > + > out: > vcpu_schedule_unlock_irq(lock, vc); > } > @@ -1150,6 +1276,71 @@ rt_dom_cntl( > return rc; > } > > +/* > + * The replenishment timer handler picks vcpus > + * from the replq and does the actual replenishment. > + */ > +static void repl_handler(void *data){ > + unsigned long flags; > + s_time_t now = NOW(); > + struct scheduler *ops = data; > + struct rt_private *prv = rt_priv(ops); > + struct list_head *replq = rt_replq(ops); > + struct timer *repl_timer = prv->repl_timer; > + struct list_head *iter, *tmp; > + struct rt_vcpu *svc = NULL; > + > + spin_lock_irqsave(&prv->lock, flags); > + > + stop_timer(repl_timer); > + > + list_for_each_safe(iter, tmp, replq) > + { > + svc = replq_elem(iter); > + > + if ( now < svc->cur_deadline ) > + break; > + > + rt_update_deadline(now, svc); > + > + /* > + * when the replenishment happens > + * svc is either on a pcpu or on > + * runq/depletedq > + */ > + if( __vcpu_on_q(svc) ) > + { > + /* put back to runq */ > + __q_remove(svc); > + __runq_insert(ops, svc); > + } > + > + /* > + * tickle regardless where it's at > + * because a running vcpu could have > + * a later deadline than others after > + * replenishment > + */ > + runq_tickle(ops, svc); Well, I'm thinking about the correctness of tickle here. The svc is running on a core, say P_i, right now. After replenishing the budget for svc, svc's deadline also changes. So svc should probably be preempted by the head vcpu in the runq. If that's the case, we should use the head of runq to tickle, instead of using svc. Right? Actually, is the runq_tickle necessary here? Can we just check if the updated svc has higher priority (smaller deadline) than the head vcpu in the runq? If so, we just tickle the cpu P_i, where svc is currently running on. (This could save a function call, although at the cost of making the logic longer.) But either way, I'm thinking the logic here is incorrect. If the updated svc has a lower priority, you will end up tickle no core although the svc should be preempted. > + > + /* > + * update replenishment event queue > + * without reprogramming the timer > + */ > + deadline_queue_remove(replq, &svc->replq_elem); > + deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq); > + } > + > + /* > + * use the vcpu that's on the top > + * or else don't program the timer change the comment to "otherwise, don't reprogram the timer" > + */ > + if( !list_empty(replq) ) > + set_timer(repl_timer, replq_elem(replq->next)->cur_deadline); > + > + spin_unlock_irqrestore(&prv->lock, flags); > +} > + > static const struct scheduler sched_rtds_def = { > .name = "SMP RTDS Scheduler", > .opt_name = "rtds", > -- > 1.7.9.5 Thanks and Best Regards, Meng
On Tue, 2016-03-08 at 23:33 -0500, Meng Xu wrote: > I didn't mark out all repeated style issues. I think you can correct > all of the style issues, such as the spaces in the code, in the next > version. > Yes, please. At v7, style issues shouldn't definitely be there any longer. Have another look at CODING_STYLE, perhaps, especially focusing on spaces in conditional expressions and line length. > Hi Dario, > Could you help check if my two concerns in the comments below make > sense? > One is small, about if we should use () to make the code clearer. > The other is about the logic in the replenishment handler. > I think tickling during replenishment does have issues, yes. See below. Anyway, Meng, can you trim your quotes when replying? By this, I mean cut the part of conversation/patches that are not relevant for what you are saying in this very email (but leave enough context in place for making what you want to point out understandable). That avoids having to scroll pages and pages of quoted text in order to find the actual content. > On Fri, Mar 4, 2016 at 8:39 PM, Tianyang Chen <tiche@seas.upenn.edu> > wrote: > > > > @@ -1033,32 +1159,40 @@ rt_vcpu_wake(const struct scheduler *ops, > > struct vcpu *vc) > > else > > SCHED_STAT_CRANK(vcpu_wake_not_runnable); > > > > - /* If context hasn't been saved for this vcpu yet, we can't > > put it on > > - * the Runqueue/DepletedQ. Instead, we set a flag so that it > > will be > > - * put on the Runqueue/DepletedQ after the context has been > > saved. > > + /* > > + * If a deadline passed while svc was asleep/blocked, we need > > new > > + * scheduling parameters (a new deadline and full budget). > > + */ > > + if ( (missed = now >= svc->cur_deadline) ) > Dario, > Is it better to have a () outside of now >= svc->cur_deadline to > make the logic clearer, instead of relying on the operator's > priority? > Yes, go ahead. You may even compute the value of missed outside of the if, and just use it (both here and below). As you wish. > > @@ -1150,6 +1276,71 @@ rt_dom_cntl( > > return rc; > > } > > > > +/* > > + * The replenishment timer handler picks vcpus > > + * from the replq and does the actual replenishment. > > + */ > > +static void repl_handler(void *data){ > > + unsigned long flags; > > + s_time_t now = NOW(); > > + struct scheduler *ops = data; > > + struct rt_private *prv = rt_priv(ops); > > + struct list_head *replq = rt_replq(ops); > > + struct timer *repl_timer = prv->repl_timer; > > + struct list_head *iter, *tmp; > > + struct rt_vcpu *svc = NULL; > > + > > + spin_lock_irqsave(&prv->lock, flags); > > + > > + stop_timer(repl_timer); > > + > > + list_for_each_safe(iter, tmp, replq) > > + { > > + svc = replq_elem(iter); > > + > > + if ( now < svc->cur_deadline ) > > + break; > > + > > + rt_update_deadline(now, svc); > > + > > + /* > > + * when the replenishment happens > > + * svc is either on a pcpu or on > > + * runq/depletedq > > + */ > > + if( __vcpu_on_q(svc) ) > > + { > > + /* put back to runq */ > > + __q_remove(svc); > > + __runq_insert(ops, svc); > > + } > > + > > + /* > > + * tickle regardless where it's at > > + * because a running vcpu could have > > + * a later deadline than others after > > + * replenishment > > + */ > > + runq_tickle(ops, svc); > Well, I'm thinking about the correctness of tickle here. > The svc is running on a core, say P_i, right now. After replenishing > the budget for svc, svc's deadline also changes. So svc should > probably be preempted by the head vcpu in the runq. If that's the > case, we should use the head of runq to tickle, instead of using svc. > Right? > I agree that, in case of replenishment of a running vcpu, calling runq_tickle() like this is not correct. In fact, the whole idea of tickling in Credit1 and 2, from which it has been borrowed to here, was meant at (potentially) putting a vcpu in execution, not vice versa. :-/ I therefore agree that, if svc ends up with a later deadline than the first vcpu in the runque, we should actually call run_tickle() on the latter! > Actually, is the runq_tickle necessary here? Can we just check if the > updated svc has higher priority (smaller deadline) than the head vcpu > in the runq? If so, we just tickle the cpu P_i, where svc is > currently > running on. > Well, if we are replenishing a vcpu that is depleted, whose new deadline can be earlier than any of the ones of the vcpus that are running (can't it?) and/or there can be idle vcpus on which you can run it, now that it has budget. So, in this case, I think we need what's done in runq_tickle()... The third case is that we replenish a vcpu that is in the runqueue, so it had budget, but was not running. In that case, there should be no chance that it should preempt a running vcpu, as it was waiting in the runqueue, which means --if we have M pcpus-- it was not among the M earliest deadline vcpus, and we just pushed its deadline away! It should also not be necessary to do any deadline comparison with other vcpus in the runqueue. In fact, still considering that we are moving the deadline ahead, it's going to either be in the same or "worst" position in the runqueue. > But either way, I'm thinking the logic here is incorrect. If the > updated svc has a lower priority, you will end up tickle no core > although the svc should be preempted. > It seems to me that the logic here could be (sort-of): for_each_to_be_replenished_vcpu(v) { deadline_queue_remove(replq, v); rt_update_deadline(v); if ( curr_on_cpu(v->processor) == v)) //running { if ( v->cur_deadline >= runq[0]->cur_deadline ) tickle(runq[0]); /* runq[0] => first vcpu in the runq */ } else if ( __vcpu_on_q(v) ) { if ( v->cur_budget == 0 ) //depleted tickle(v); else //runnable /* do nothing */ } deadline_queue_insert(v, replq); } Thoughts? I actually think there may be room for a nasty race, in case we do more than one replenishments. In fact, assume that, at time t: - we do the replenishment of v1, which is running and v2, which is runnable, - we have 1 cpu, - v1 and v2 have, currently, the same deadline == t, - v1's new deadline is going to be t+100, and v2's new deadline is going to be t+150, - v2 is the only (i.e., also the first) runnable vcpu, - v1's replenishment event comes first in the replenishment queue. With the code above, we update v1's deadline (to t+100), go check it against v2's _not_updated_ deadline (t) and (since t+100 > t) find that a preemption is necessary, so we call tickle(v2). That will raise SCHEDULE_SOFTIRQ for the cpu, because v2 should --as far as situation is right now-- preempt v1. However, right after that, we update v2's deadline to t+150, and we do nothing. So, even if the vcpu with the earliest deadline (v1) is running, we reschedule. This should be harmless, as we do figure out during rt_schedule() that no real context switch is necessary, but I think it would better be avoided, if possible. It looks to me that we can re-arrange the code above as follows: for_each_to_be_replenished_vcpu(v) { rt_update_deadline(v); } for_each_to_be_replenished_vcpu(v) { deadline_queue_remove(replq, v); if ( curr_on_cpu(v->processor) == v)) //running { if ( v->cur_deadline >= runq[0]->cur_deadline ) tickle(runq[0]); /* runq[0] => first vcpu in the runq */ } else if ( __vcpu_on_q(v) ) { if ( v->cur_budget == 0 ) //depleted tickle(v); else //runnable /* do nothing */ } deadline_queue_insert(v, replq); } Basically, by doing all the replenishments (which includes updating all the deadlines) upfront, we should be able to prevent the above situation. So, again, thoughts? Regards, Dario
On Wed, Mar 9, 2016 at 10:46 AM, Dario Faggioli <dario.faggioli@citrix.com> wrote: > On Tue, 2016-03-08 at 23:33 -0500, Meng Xu wrote: >> I didn't mark out all repeated style issues. I think you can correct >> all of the style issues, such as the spaces in the code, in the next >> version. >> > Yes, please. At v7, style issues shouldn't definitely be there any > longer. > > Have another look at CODING_STYLE, perhaps, especially focusing on > spaces in conditional expressions and line length. Tianyang, please correct those coding styles in next version. Now it's time to pay more attention to coding style... > >> Hi Dario, >> Could you help check if my two concerns in the comments below make >> sense? >> One is small, about if we should use () to make the code clearer. >> The other is about the logic in the replenishment handler. >> > I think tickling during replenishment does have issues, yes. See below. > > Anyway, Meng, can you trim your quotes when replying? By this, I mean > cut the part of conversation/patches that are not relevant for what you > are saying in this very email (but leave enough context in place for > making what you want to point out understandable). Sure.. Got it. >> > @@ -1150,6 +1276,71 @@ rt_dom_cntl( >> > return rc; >> > } >> > >> > +/* >> > + * The replenishment timer handler picks vcpus >> > + * from the replq and does the actual replenishment. >> > + */ >> > +static void repl_handler(void *data){ >> > + unsigned long flags; >> > + s_time_t now = NOW(); >> > + struct scheduler *ops = data; >> > + struct rt_private *prv = rt_priv(ops); >> > + struct list_head *replq = rt_replq(ops); >> > + struct timer *repl_timer = prv->repl_timer; >> > + struct list_head *iter, *tmp; >> > + struct rt_vcpu *svc = NULL; >> > + >> > + spin_lock_irqsave(&prv->lock, flags); >> > + >> > + stop_timer(repl_timer); >> > + >> > + list_for_each_safe(iter, tmp, replq) >> > + { >> > + svc = replq_elem(iter); >> > + >> > + if ( now < svc->cur_deadline ) >> > + break; >> > + >> > + rt_update_deadline(now, svc); >> > + >> > + /* >> > + * when the replenishment happens >> > + * svc is either on a pcpu or on >> > + * runq/depletedq >> > + */ >> > + if( __vcpu_on_q(svc) ) >> > + { >> > + /* put back to runq */ >> > + __q_remove(svc); >> > + __runq_insert(ops, svc); >> > + } >> > + >> > + /* >> > + * tickle regardless where it's at >> > + * because a running vcpu could have >> > + * a later deadline than others after >> > + * replenishment >> > + */ >> > + runq_tickle(ops, svc); >> Well, I'm thinking about the correctness of tickle here. >> The svc is running on a core, say P_i, right now. After replenishing >> the budget for svc, svc's deadline also changes. So svc should >> probably be preempted by the head vcpu in the runq. If that's the >> case, we should use the head of runq to tickle, instead of using svc. >> Right? >> > I agree that, in case of replenishment of a running vcpu, calling > runq_tickle() like this is not correct. In fact, the whole idea of > tickling in Credit1 and 2, from which it has been borrowed to here, was > meant at (potentially) putting a vcpu in execution, not vice versa. :-/ > > I therefore agree that, if svc ends up with a later deadline than the > first vcpu in the runque, we should actually call run_tickle() on the > latter! > >> Actually, is the runq_tickle necessary here? Can we just check if the >> updated svc has higher priority (smaller deadline) than the head vcpu >> in the runq? If so, we just tickle the cpu P_i, where svc is >> currently >> running on. >> > Well, if we are replenishing a vcpu that is depleted, whose new > deadline can be earlier than any of the ones of the vcpus that are > running (can't it?) and/or there can be idle vcpus on which you can run > it, now that it has budget. So, in this case, I think we need what's > done in runq_tickle()... > > The third case is that we replenish a vcpu that is in the runqueue, so > it had budget, but was not running. In that case, there should be no > chance that it should preempt a running vcpu, as it was waiting in the > runqueue, which means --if we have M pcpus-- it was not among the M > earliest deadline vcpus, and we just pushed its deadline away! > It should also not be necessary to do any deadline comparison with > other vcpus in the runqueue. In fact, still considering that we are > moving the deadline ahead, it's going to either be in the same or > "worst" position in the runqueue. > >> But either way, I'm thinking the logic here is incorrect. If the >> updated svc has a lower priority, you will end up tickle no core >> although the svc should be preempted. >> > It seems to me that the logic here could be (sort-of): > > for_each_to_be_replenished_vcpu(v) > { > deadline_queue_remove(replq, v); > rt_update_deadline(v); > > if ( curr_on_cpu(v->processor) == v)) //running > { > if ( v->cur_deadline >= runq[0]->cur_deadline ) > tickle(runq[0]); /* runq[0] => first vcpu in the runq */ > } > else if ( __vcpu_on_q(v) ) > { > if ( v->cur_budget == 0 ) //depleted > tickle(v); > else //runnable > /* do nothing */ > } > deadline_queue_insert(v, replq); > } > > Thoughts? I like the logic here. But there is one thing to note/change. After we run rt_update_deadline(v), the v->cur_deadline will be set to v->budget, which means that we cannot use the v->cur_deadline to determine if it is in runq or depletedq. I suggest to have a flag to determine if the v is in depletedq by v->cur_budget == 0 first before we run rt_update_deadline(v); and use the flag later. > > I actually think there may be room for a nasty race, in case we do more > than one replenishments. > > In fact, assume that, at time t: > - we do the replenishment of v1, which is running and v2, which is > runnable, > - we have 1 cpu, > - v1 and v2 have, currently, the same deadline == t, > - v1's new deadline is going to be t+100, and v2's new deadline is > going to be t+150, > - v2 is the only (i.e., also the first) runnable vcpu, > - v1's replenishment event comes first in the replenishment queue. > > With the code above, we update v1's deadline (to t+100), go check it > against v2's _not_updated_ deadline (t) and (since t+100 > t) find that > a preemption is necessary, so we call tickle(v2). That will raise > SCHEDULE_SOFTIRQ for the cpu, because v2 should --as far as situation > is right now-- preempt v1. > > However, right after that, we update v2's deadline to t+150, and we do > nothing. So, even if the vcpu with the earliest deadline (v1) is > running, we reschedule. Hmm, I kind of get what you want to deliver with the example here. I agree. I will just summarize what I understand here: When we replenish VCPU one by one and tickle them one by one, we may end up use a to-be-updated vcpu to tickle, which will probably be "kicked out" by the next to-be-updated vcpu. The rt_schedule will probably do a noop for the to-be-updated vcpu since it can tell this vcpu actually misses deadline (because this vcpu is about to update the deadline, it means this vcpu just reaches/passed the current deadline.) > > This should be harmless, as we do figure out during rt_schedule() that > no real context switch is necessary, but I think it would better be > avoided, if possible. Right . > > It looks to me that we can re-arrange the code above as follows: > > for_each_to_be_replenished_vcpu(v) > { > rt_update_deadline(v); > } we have to record the number of VCPUs that has updated their deadline, so that we know when to stop for the next loop.. > > for_each_to_be_replenished_vcpu(v) > { > deadline_queue_remove(replq, v); > > if ( curr_on_cpu(v->processor) == v)) //running > { > if ( v->cur_deadline >= runq[0]->cur_deadline ) > tickle(runq[0]); /* runq[0] => first vcpu in the runq */ > } > else if ( __vcpu_on_q(v) ) > { > if ( v->cur_budget == 0 ) //depleted > tickle(v); > else //runnable > /* do nothing */ > } > deadline_queue_insert(v, replq); > } > > Basically, by doing all the replenishments (which includes updating all > the deadlines) upfront, we should be able to prevent the above > situation. > > So, again, thoughts? I think we need to decide which vcpu is on depleted q before update deadline and we also need to record which vcpus should be updated. So I added some code into your code: #define __RTDS_is_depleted 3 #define RTDS_is_depleted (1<<__RTDS_is_depleted) int num_repl_vcpus = 0; for_each_to_be_replenished_vcpu(v) { if (v->cur_budget <= 0) set_bit(__RTDS_is_depleted, &v->flags); rt_update_deadline(v); num_repl_vcpus++; } for_each_to_be_replenished_vcpu(v) { deadline_queue_remove(replq, v); if ( curr_on_cpu(v->processor) == v)) //running { if ( v->cur_deadline >= runq[0]->cur_deadline ) tickle(runq[0]); /* runq[0] => first vcpu in the runq */ } else if ( __vcpu_on_q(v) ) { if ( v->flags & RTDS_is_depleted ) //depleted { clear_bit(__RTDS_is_depleted, &v->flags); tickle(v); } else //runnable /* do nothing */ } deadline_queue_insert(v, replq); /* stop at the vcpu that does not need replenishment */ num_repl_vcpus--; if (!num_repl_vcpus) break; } What do you think this version? Thanks and Best Regards, Meng
On Wed, 2016-03-09 at 23:00 -0500, Meng Xu wrote: > On Wed, Mar 9, 2016 at 10:46 AM, Dario Faggioli > <dario.faggioli@citrix.com> wrote: > > > > Basically, by doing all the replenishments (which includes updating > > all > > the deadlines) upfront, we should be able to prevent the above > > situation. > > > > So, again, thoughts? > I think we need to decide which vcpu is on depleted q before update > deadline and we also need to record which vcpus should be updated. > I think you are right about the need of a depleted flag, or something that will have the same effect (but I really do like using a flag for that! :-D ). I don't think we really need to count anything. In fact, what I had in mind and tried to put down in pseudocode is that we traverse the list of replenishment events twice. During the first traversal, we do not remove the elements that we replenish (i.e., the ones that we call rt_update_deadline() on). Therefore, we can just do the second traversal, find them all in there, handle the tickling, and --in this case-- remove and re-insert them. Wouldn't this work? > So > I added some code into your code: > > #define __RTDS_is_depleted 3 > #define RTDS_is_depleted (1<<__RTDS_is_depleted) > As said, I like this. However... > int num_repl_vcpus = 0; > for_each_to_be_replenished_vcpu(v) > { > if (v->cur_budget <= 0) > set_bit(__RTDS_is_depleted, &v->flags); > ... I think we can do this in burn_budget(), where we have this check in place already. > rt_update_deadline(v); > num_repl_vcpus++; > } > > for_each_to_be_replenished_vcpu(v) > { > deadline_queue_remove(replq, v); > > if ( curr_on_cpu(v->processor) == v)) //running > { > if ( v->cur_deadline >= runq[0]->cur_deadline ) > tickle(runq[0]); /* runq[0] => first vcpu in the runq */ > } > else if ( __vcpu_on_q(v) ) > { > if ( v->flags & RTDS_is_depleted ) //depleted > { > clear_bit(__RTDS_is_depleted, &v->flags); > if ( test_and_clear(xxx) ) Or __test_and_clear(xxx). > tickle(v); > } > else //runnable > /* do nothing */ > } > deadline_queue_insert(v, replq); > > /* stop at the vcpu that does not need replenishment */ > num_repl_vcpus--; > if (!num_repl_vcpus) > break; > If we really need to record/mark this, I want to think at how that would be best done, as I'm not a fan of this counting... But hopefully, we just don't need to do anything like that, do we? Thanks and Regards, Dario
On Thu, Mar 10, 2016 at 5:38 AM, Dario Faggioli <dario.faggioli@citrix.com> wrote: > On Wed, 2016-03-09 at 23:00 -0500, Meng Xu wrote: >> On Wed, Mar 9, 2016 at 10:46 AM, Dario Faggioli >> <dario.faggioli@citrix.com> wrote: >> > >> > Basically, by doing all the replenishments (which includes updating >> > all >> > the deadlines) upfront, we should be able to prevent the above >> > situation. >> > >> > So, again, thoughts? >> I think we need to decide which vcpu is on depleted q before update >> deadline and we also need to record which vcpus should be updated. >> > I think you are right about the need of a depleted flag, or something > that will have the same effect (but I really do like using a flag for > that! :-D ). > > I don't think we really need to count anything. In fact, what I had in > mind and tried to put down in pseudocode is that we traverse the list > of replenishment events twice. During the first traversal, we do not > remove the elements that we replenish (i.e., the ones that we call > rt_update_deadline() on). Therefore, we can just do the second > traversal, find them all in there, handle the tickling, and --in this > case-- remove and re-insert them. Wouldn't this work? My concern is that: Once we run rt_update_deadline() in the first traversal of the list, we have updated the cur_deadline and cur_budget already. Since the replenish queue is sorted by the cur_deadline, how can we know which vcpu has been updated in the first traversal and need to be reinsert? We don't have to traverse the whole replq to reinsert all vcpus since some of them haven't been replenished yet. If we directly remove and reinsert, we have to change the replq_reinsert() to provide a return value if the location of the to-be-inserted vcpu changes, which probably complicates the logic of the replq_reinsert(), IMO. If we wan to avoid the counting, we can add a flag like #define __RTDS_delayed_reinsert_replq 4 #define RTDS_delayed_reinsert_replq (1<< __RTDS_delayed_reinsert_replq) so that we know when we should stop at the second traversal. > >> So >> I added some code into your code: >> >> #define __RTDS_is_depleted 3 >> #define RTDS_is_depleted (1<<__RTDS_is_depleted) >> > As said, I like this. However... > >> int num_repl_vcpus = 0; >> for_each_to_be_replenished_vcpu(v) >> { >> if (v->cur_budget <= 0) >> set_bit(__RTDS_is_depleted, &v->flags); >> > ... I think we can do this in burn_budget(), where we have this check > in place already. Agree. :-) > >> rt_update_deadline(v); >> num_repl_vcpus++; >> } >> >> for_each_to_be_replenished_vcpu(v) >> { >> deadline_queue_remove(replq, v); >> >> if ( curr_on_cpu(v->processor) == v)) //running >> { >> if ( v->cur_deadline >= runq[0]->cur_deadline ) >> tickle(runq[0]); /* runq[0] => first vcpu in the runq */ >> } >> else if ( __vcpu_on_q(v) ) >> { >> if ( v->flags & RTDS_is_depleted ) //depleted >> { >> clear_bit(__RTDS_is_depleted, &v->flags); >> > if ( test_and_clear(xxx) ) > > Or __test_and_clear(xxx). > Probably this one: test_and_clear_bit() which is used in test and clear the __RTDS_delayed_runq_add bit >> tickle(v); >> } >> else //runnable >> /* do nothing */ >> } >> deadline_queue_insert(v, replq); >> >> /* stop at the vcpu that does not need replenishment */ >> num_repl_vcpus--; >> if (!num_repl_vcpus) >> break; >> > If we really need to record/mark this, I want to think at how that > would be best done, as I'm not a fan of this counting... But hopefully, > we just don't need to do anything like that, do we? I think we need to know when to stop at the second travesal, unless I'm wrong in the above. :-) I would prefer to using the flag, because it looks like much clearer than counting? Thanks and Best Regards, Meng ----------- Meng Xu PhD Student in Computer and Information Science University of Pennsylvania http://www.cis.upenn.edu/~mengxu/
On Thu, 2016-03-10 at 10:28 -0500, Meng Xu wrote: > On Thu, Mar 10, 2016 at 5:38 AM, Dario Faggioli > <dario.faggioli@citrix.com> wrote: > > > > I don't think we really need to count anything. In fact, what I had > > in > > mind and tried to put down in pseudocode is that we traverse the > > list > > of replenishment events twice. During the first traversal, we do > > not > > remove the elements that we replenish (i.e., the ones that we call > > rt_update_deadline() on). Therefore, we can just do the second > > traversal, find them all in there, handle the tickling, and --in > > this > > case-- remove and re-insert them. Wouldn't this work? > My concern is that: > Once we run rt_update_deadline() in the first traversal of the list, > we have updated the cur_deadline and cur_budget already. > Since the replenish queue is sorted by the cur_deadline, how can we > know which vcpu has been updated in the first traversal and need to > be > reinsert? We don't have to traverse the whole replq to reinsert all > vcpus since some of them haven't been replenished yet. > Ah, you're right, doing all the rt_update_deadline() in the first loop, we screw the stop condition of the second loop. I still don't like counting, it looks fragile. :-/ This that you propose here... > If we wan to avoid the counting, we can add a flag like > #define __RTDS_delayed_reinsert_replq 4 > #define RTDS_delayed_reinsert_replq (1<< > __RTDS_delayed_reinsert_replq) > so that we know when we should stop at the second traversal. > ...seems like it could work, but I also am not super happy about it, as it does not look to me there should be the need of such a generic piece of information such as a flag, for this very specific purpose. I mean, I know we have plenty of free bits in flag, but it's something that happens *all* *inside* one function (replenishment timer handler). What about an internal (to the timer replenishment fucntion), temporary, list. Something along the lines of: ... LIST_HEAD(tmp_replq); list_for_each_safe(iter, tmp, replq) { svc = replq_elem(iter); if ( now < svc->cur_deadline ) break; list_del(&svc->replq_elem); rt_update_deadline(now, svc); list_add(&svc->replq_elem, &tmp_replq); } list_for_each_safe(iter, tmp, tmp_replq) { svc = replq_elem(iter); < tickling logic > list_del(&svc->replq_elem); deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq); } ... So, basically, the idea is: - first, we fetch all the vcpus that needs a replenishment, remove them from replenishment queue, do the replenishment and stash them in a temp list; - second, for all the vcpus that we replenished (which we know which ones they are: all the ones in the temp list!) we apply the proper tickling logic, remove them from the temp list and queue their new replenishment event. It may look a bit convoluted, all these list moving, but I do like the fact that is is super self-contained. How does that sound / What did I forget this time ? :-) BTW, I hope I got the code snippet right, but please, let's focus and discuss the idea. Regards, Dario
On Thu, Mar 10, 2016 at 11:43 AM, Dario Faggioli <dario.faggioli@citrix.com> wrote: > On Thu, 2016-03-10 at 10:28 -0500, Meng Xu wrote: >> On Thu, Mar 10, 2016 at 5:38 AM, Dario Faggioli >> <dario.faggioli@citrix.com> wrote: >> > >> > I don't think we really need to count anything. In fact, what I had >> > in >> > mind and tried to put down in pseudocode is that we traverse the >> > list >> > of replenishment events twice. During the first traversal, we do >> > not >> > remove the elements that we replenish (i.e., the ones that we call >> > rt_update_deadline() on). Therefore, we can just do the second >> > traversal, find them all in there, handle the tickling, and --in >> > this >> > case-- remove and re-insert them. Wouldn't this work? >> My concern is that: >> Once we run rt_update_deadline() in the first traversal of the list, >> we have updated the cur_deadline and cur_budget already. >> Since the replenish queue is sorted by the cur_deadline, how can we >> know which vcpu has been updated in the first traversal and need to >> be >> reinsert? We don't have to traverse the whole replq to reinsert all >> vcpus since some of them haven't been replenished yet. >> > Ah, you're right, doing all the rt_update_deadline() in the first loop, > we screw the stop condition of the second loop. > > I still don't like counting, it looks fragile. :-/ > > This that you propose here... >> If we wan to avoid the counting, we can add a flag like >> #define __RTDS_delayed_reinsert_replq 4 >> #define RTDS_delayed_reinsert_replq (1<< >> __RTDS_delayed_reinsert_replq) >> so that we know when we should stop at the second traversal. >> > ...seems like it could work, but I also am not super happy about it, as > it does not look to me there should be the need of such a generic piece > of information such as a flag, for this very specific purpose. > > I mean, I know we have plenty of free bits in flag, but it's something > that happens *all* *inside* one function (replenishment timer handler). OK. Agree. The internal list idea flashed in my mind before and I didn't catch it. ;-) > > What about an internal (to the timer replenishment fucntion), > temporary, list. Something along the lines of: I think the pseudo-code makes sense. I just need to add some more logic into it to make it complete. It forgets to handle the runq. > > ... > LIST_HEAD(tmp_replq); > > list_for_each_safe(iter, tmp, replq) > { > svc = replq_elem(iter); > > if ( now < svc->cur_deadline ) > break; > > list_del(&svc->replq_elem); > rt_update_deadline(now, svc); > list_add(&svc->replq_elem, &tmp_replq); /* if svc is on runq, we need to put it to the correct place since its deadline changes. */ if( __vcpu_on_q(svc) ) { /* put back to runq */ __q_remove(svc); __runq_insert(ops, svc); > } > > list_for_each_safe(iter, tmp, tmp_replq) > { > svc = replq_elem(iter); > > < tickling logic > > > list_del(&svc->replq_elem); > deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq); > } > ... > > So, basically, the idea is: > - first, we fetch all the vcpus that needs a replenishment, remove > them from replenishment queue, do the replenishment and stash them > in a temp list; > - second, for all the vcpus that we replenished (which we know which > ones they are: all the ones in the temp list!) we apply the proper > tickling logic, remove them from the temp list and queue their new > replenishment event. > > It may look a bit convoluted, all these list moving, but I do like the > fact that is is super self-contained. > > How does that sound / What did I forget this time ? :-) Besides we need to "resort" the runq if the to-be-replenished vcpu is on the runq now. Besides that, I think it's good. > > BTW, I hope I got the code snippet right, but please, let's focus and > discuss the idea. Right. :-) Thanks and Best Regards, Meng
On Thu, 2016-03-10 at 13:08 -0500, Meng Xu wrote: > On Thu, Mar 10, 2016 at 11:43 AM, Dario Faggioli > <dario.faggioli@citrix.com> wrote: > > > I think the pseudo-code makes sense. I just need to add some more > logic into it to make it complete. It forgets to handle the runq. > You mean... > > > > > > ... > > LIST_HEAD(tmp_replq); > > > > list_for_each_safe(iter, tmp, replq) > > { > > svc = replq_elem(iter); > > > > if ( now < svc->cur_deadline ) > > break; > > > > list_del(&svc->replq_elem); > > rt_update_deadline(now, svc); > > list_add(&svc->replq_elem, &tmp_replq); > /* if svc is on runq, we need to put it to the correct place > since its deadline changes. */ > if( __vcpu_on_q(svc) ) > { > /* put back to runq */ > __q_remove(svc); > __runq_insert(ops, svc); > ...This? Well, sure, this was being done in repl_handler() in Tianyang's v7 already, and I never meant to say we shouldn't do that any longer. I actually did not include it in here, because it was obvious to me that it still was necessary, and it was probably ok to put it in either of the loops (while I wanted to highlight what we need to do in the first and what in the second). > > It may look a bit convoluted, all these list moving, but I do like > > the > > fact that is is super self-contained. > > > > How does that sound / What did I forget this time ? :-) > Besides we need to "resort" the runq if the to-be-replenished vcpu is > on the runq now. Besides that, I think it's good. > If by re-sorting you mean what you added above, sure thing. Regards, Dario
[My bad, Dario, I somehow only sent to you my reply... I'm resending to everyone.] On Thu, Mar 10, 2016 at 6:53 PM, Dario Faggioli <dario.faggioli@citrix.com> wrote: > On Thu, 2016-03-10 at 13:08 -0500, Meng Xu wrote: >> On Thu, Mar 10, 2016 at 11:43 AM, Dario Faggioli >> <dario.faggioli@citrix.com> wrote: >> > >> I think the pseudo-code makes sense. I just need to add some more >> logic into it to make it complete. It forgets to handle the runq. >> > You mean... >> > >> > >> > ... >> > LIST_HEAD(tmp_replq); >> > >> > list_for_each_safe(iter, tmp, replq) >> > { >> > svc = replq_elem(iter); >> > >> > if ( now < svc->cur_deadline ) >> > break; >> > >> > list_del(&svc->replq_elem); >> > rt_update_deadline(now, svc); >> > list_add(&svc->replq_elem, &tmp_replq); >> /* if svc is on runq, we need to put it to the correct place >> since its deadline changes. */ >> if( __vcpu_on_q(svc) ) >> { >> /* put back to runq */ >> __q_remove(svc); >> __runq_insert(ops, svc); >> > ...This? Yes. > > Well, sure, this was being done in repl_handler() in Tianyang's v7 > already, and I never meant to say we shouldn't do that any longer. > > I actually did not include it in here, because it was obvious to me > that it still was necessary, and it was probably ok to put it in either > of the loops (while I wanted to highlight what we need to do in the > first and what in the second). > >> > It may look a bit convoluted, all these list moving, but I do like >> > the >> > fact that is is super self-contained. >> > >> > How does that sound / What did I forget this time ? :-) >> Besides we need to "resort" the runq if the to-be-replenished vcpu is >> on the runq now. Besides that, I think it's good. >> > If by re-sorting you mean what you added above, sure thing. > Yes. Since we have reached to some agreement on this, we can work on the v8 now. :-D Thanks and Best Regards, Meng ----------- Meng Xu PhD Student in Computer and Information Science University of Pennsylvania http://www.cis.upenn.edu/~mengxu/
diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c index bfed2e2..06803be 100644 --- a/xen/common/sched_rt.c +++ b/xen/common/sched_rt.c @@ -16,6 +16,7 @@ #include <xen/delay.h> #include <xen/event.h> #include <xen/time.h> +#include <xen/timer.h> #include <xen/perfc.h> #include <xen/sched-if.h> #include <xen/softirq.h> @@ -87,7 +88,7 @@ #define RTDS_DEFAULT_BUDGET (MICROSECS(4000)) #define UPDATE_LIMIT_SHIFT 10 -#define MAX_SCHEDULE (MILLISECS(1)) + /* * Flags */ @@ -142,6 +143,9 @@ static cpumask_var_t *_cpumask_scratch; */ static unsigned int nr_rt_ops; +/* handler for the replenishment timer */ +static void repl_handler(void *data); + /* * Systme-wide private data, include global RunQueue/DepletedQ * Global lock is referenced by schedule_data.schedule_lock from all @@ -152,7 +156,9 @@ struct rt_private { struct list_head sdom; /* list of availalbe domains, used for dump */ struct list_head runq; /* ordered list of runnable vcpus */ struct list_head depletedq; /* unordered list of depleted vcpus */ + struct list_head replq; /* ordered list of vcpus that need replenishment */ cpumask_t tickled; /* cpus been tickled */ + struct timer *repl_timer; /* replenishment timer */ }; /* @@ -160,6 +166,7 @@ struct rt_private { */ struct rt_vcpu { struct list_head q_elem; /* on the runq/depletedq list */ + struct list_head replq_elem;/* on the repl event list */ /* Up-pointers */ struct rt_dom *sdom; @@ -213,8 +220,14 @@ static inline struct list_head *rt_depletedq(const struct scheduler *ops) return &rt_priv(ops)->depletedq; } +static inline struct list_head *rt_replq(const struct scheduler *ops) +{ + return &rt_priv(ops)->replq; +} + /* - * Queue helper functions for runq and depletedq + * Helper functions for manipulating the runqueue, the depleted queue, + * and the replenishment events queue. */ static int __vcpu_on_q(const struct rt_vcpu *svc) @@ -228,6 +241,18 @@ __q_elem(struct list_head *elem) return list_entry(elem, struct rt_vcpu, q_elem); } +static struct rt_vcpu * +replq_elem(struct list_head *elem) +{ + return list_entry(elem, struct rt_vcpu, replq_elem); +} + +static int +vcpu_on_replq(const struct rt_vcpu *svc) +{ + return !list_empty(&svc->replq_elem); +} + /* * Debug related code, dump vcpu/cpu information */ @@ -288,7 +313,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu) static void rt_dump(const struct scheduler *ops) { - struct list_head *runq, *depletedq, *iter; + struct list_head *runq, *depletedq, *replq, *iter; struct rt_private *prv = rt_priv(ops); struct rt_vcpu *svc; struct rt_dom *sdom; @@ -301,6 +326,7 @@ rt_dump(const struct scheduler *ops) runq = rt_runq(ops); depletedq = rt_depletedq(ops); + replq = rt_replq(ops); printk("Global RunQueue info:\n"); list_for_each( iter, runq ) @@ -316,6 +342,13 @@ rt_dump(const struct scheduler *ops) rt_dump_vcpu(ops, svc); } + printk("Global Replenishment Event info:\n"); + list_for_each( iter, replq ) + { + svc = replq_elem(iter); + rt_dump_vcpu(ops, svc); + } + printk("Domain info:\n"); list_for_each( iter, &prv->sdom ) { @@ -380,11 +413,76 @@ rt_update_deadline(s_time_t now, struct rt_vcpu *svc) return; } +/* + * A helper function that only removes a vcpu from a queue + * and it returns 1 if the vcpu was in front of the list. + */ +static inline int +deadline_queue_remove(struct list_head *queue, struct list_head *elem) +{ + int pos = 0; + if( queue->next != elem ) + pos = 1; + list_del_init(elem); + return !pos; +} + static inline void __q_remove(struct rt_vcpu *svc) { - if ( __vcpu_on_q(svc) ) - list_del_init(&svc->q_elem); + ASSERT( __vcpu_on_q(svc) ); + list_del_init(&svc->q_elem); +} + +/* + * Removing a vcpu from the replenishment queue could + * re-program the timer for the next replenishment event + * if it was at the front of the list. + */ +static inline void +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc) +{ + struct rt_private *prv = rt_priv(ops); + struct list_head *replq = rt_replq(ops); + struct timer* repl_timer = prv->repl_timer; + + ASSERT( vcpu_on_replq(svc) ); + + if( deadline_queue_remove(replq,&svc->replq_elem) ) + { + /* re-arm the timer for the next replenishment event */ + if( !list_empty(replq) ) + { + struct rt_vcpu *svc_next = replq_elem(replq->next); + set_timer(repl_timer, svc_next->cur_deadline); + } + else + stop_timer(repl_timer); + } +} + +/* + * An utility function that inserts a vcpu to a + * queue based on certain order (EDF). The returned + * value is 1 if a vcpu has been inserted to the + * front of a list. + */ +static int +deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head *elem), + struct rt_vcpu *svc, struct list_head *elem, struct list_head *queue) +{ + struct list_head *iter; + int pos = 0; + + list_for_each(iter, queue) + { + struct rt_vcpu * iter_svc = (*_get_q_elem)(iter); + if ( svc->cur_deadline <= iter_svc->cur_deadline ) + break; + pos++; + } + list_add_tail(elem, iter); + return !pos; } /* @@ -397,27 +495,62 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc) { struct rt_private *prv = rt_priv(ops); struct list_head *runq = rt_runq(ops); - struct list_head *iter; ASSERT( spin_is_locked(&prv->lock) ); - ASSERT( !__vcpu_on_q(svc) ); + ASSERT( vcpu_on_replq(svc) ); /* add svc to runq if svc still has budget */ if ( svc->cur_budget > 0 ) - { - list_for_each(iter, runq) - { - struct rt_vcpu * iter_svc = __q_elem(iter); - if ( svc->cur_deadline <= iter_svc->cur_deadline ) - break; - } - list_add_tail(&svc->q_elem, iter); - } + deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq); else - { list_add(&svc->q_elem, &prv->depletedq); +} + +/* + * Insert svc into the replenishment event list + * in replenishment time order. + * vcpus that need to be replished earlier go first. + * The timer may be re-programmed if svc is inserted + * at the front of the event list. + */ +static void +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc) +{ + struct list_head *replq = rt_replq(ops); + struct rt_private *prv = rt_priv(ops); + struct timer *repl_timer = prv->repl_timer; + + ASSERT( !vcpu_on_replq(svc) ); + + if( deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq) ) + set_timer(repl_timer,svc->cur_deadline); +} + +/* + * Removes and re-inserts an event to the replenishment queue. + */ +static void +replq_reinsert(const struct scheduler *ops, struct rt_vcpu *svc) +{ + struct list_head *replq = rt_replq(ops); + struct rt_private *prv = rt_priv(ops); + struct timer *repl_timer = prv->repl_timer; + + ASSERT( vcpu_on_replq(svc) ); + + if( deadline_queue_remove(replq,&svc->replq_elem) ) + { + if( deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq) ) + set_timer(repl_timer,svc->cur_deadline); + else + { + struct rt_vcpu *svc_next = replq_elem(replq->next); + set_timer(repl_timer, svc_next->cur_deadline); + } } + else if( deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq) ) + set_timer(repl_timer,svc->cur_deadline); } /* @@ -449,11 +582,18 @@ rt_init(struct scheduler *ops) INIT_LIST_HEAD(&prv->sdom); INIT_LIST_HEAD(&prv->runq); INIT_LIST_HEAD(&prv->depletedq); + INIT_LIST_HEAD(&prv->replq); cpumask_clear(&prv->tickled); ops->sched_data = prv; + /* + * The timer initialization will happen later when + * the first pcpu is added to this pool in alloc_pdata. + */ + prv->repl_timer = NULL; + return 0; no_mem: @@ -473,6 +613,10 @@ rt_deinit(struct scheduler *ops) xfree(_cpumask_scratch); _cpumask_scratch = NULL; } + + kill_timer(prv->repl_timer); + xfree(prv->repl_timer); + ops->sched_data = NULL; xfree(prv); } @@ -494,6 +638,17 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu) if ( !alloc_cpumask_var(&_cpumask_scratch[cpu]) ) return NULL; + if( prv->repl_timer == NULL ) + { + /* allocate the timer on the first cpu of this pool */ + prv->repl_timer = xzalloc(struct timer); + + if(prv->repl_timer == NULL ) + return NULL; + + init_timer(prv->repl_timer, repl_handler, (void *)ops, cpu); + } + /* 1 indicates alloc. succeed in schedule.c */ return (void *)1; } @@ -587,6 +742,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd) return NULL; INIT_LIST_HEAD(&svc->q_elem); + INIT_LIST_HEAD(&svc->replq_elem); svc->flags = 0U; svc->sdom = dd; svc->vcpu = vc; @@ -610,7 +766,8 @@ rt_free_vdata(const struct scheduler *ops, void *priv) } /* - * This function is called in sched_move_domain() in schedule.c + * It is called in sched_move_domain() and sched_init_vcpu + * in schedule.c * When move a domain to a new cpupool. * It inserts vcpus of moving domain to the scheduler's RunQ in * dest. cpupool. @@ -628,8 +785,13 @@ rt_vcpu_insert(const struct scheduler *ops, struct vcpu *vc) if ( now >= svc->cur_deadline ) rt_update_deadline(now, svc); - if ( !__vcpu_on_q(svc) && vcpu_runnable(vc) && !vc->is_running ) - __runq_insert(ops, svc); + if ( !__vcpu_on_q(svc) && vcpu_runnable(vc) ) + { + __replq_insert(ops, svc); + + if( !vc->is_running) + __runq_insert(ops, svc); + } vcpu_schedule_unlock_irq(lock, vc); SCHED_STAT_CRANK(vcpu_insert); @@ -652,6 +814,10 @@ rt_vcpu_remove(const struct scheduler *ops, struct vcpu *vc) lock = vcpu_schedule_lock_irq(vc); if ( __vcpu_on_q(svc) ) __q_remove(svc); + + if( vcpu_on_replq(svc) ) + __replq_remove(ops,svc); + vcpu_schedule_unlock_irq(lock, vc); } @@ -784,44 +950,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t *mask) } /* - * Update vcpu's budget and - * sort runq by insert the modifed vcpu back to runq - * lock is grabbed before calling this function - */ -static void -__repl_update(const struct scheduler *ops, s_time_t now) -{ - struct list_head *runq = rt_runq(ops); - struct list_head *depletedq = rt_depletedq(ops); - struct list_head *iter; - struct list_head *tmp; - struct rt_vcpu *svc = NULL; - - list_for_each_safe(iter, tmp, runq) - { - svc = __q_elem(iter); - if ( now < svc->cur_deadline ) - break; - - rt_update_deadline(now, svc); - /* reinsert the vcpu if its deadline is updated */ - __q_remove(svc); - __runq_insert(ops, svc); - } - - list_for_each_safe(iter, tmp, depletedq) - { - svc = __q_elem(iter); - if ( now >= svc->cur_deadline ) - { - rt_update_deadline(now, svc); - __q_remove(svc); /* remove from depleted queue */ - __runq_insert(ops, svc); /* add to runq */ - } - } -} - -/* * schedule function for rt scheduler. * The lock is already grabbed in schedule.c, no need to lock here */ @@ -840,8 +968,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched /* burn_budget would return for IDLE VCPU */ burn_budget(ops, scurr, now); - __repl_update(ops, now); - if ( tasklet_work_scheduled ) { trace_var(TRC_RTDS_SCHED_TASKLET, 1, 0, NULL); @@ -868,6 +994,7 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched set_bit(__RTDS_delayed_runq_add, &scurr->flags); snext->last_start = now; + ret.time = -1; /* if an idle vcpu is picked */ if ( !is_idle_vcpu(snext->vcpu) ) { if ( snext != scurr ) @@ -880,9 +1007,8 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched snext->vcpu->processor = cpu; ret.migrated = 1; } + ret.time = snext->budget; /* invoke the scheduler next time */ } - - ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */ ret.task = snext->vcpu; return ret; @@ -903,7 +1029,10 @@ rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) if ( curr_on_cpu(vc->processor) == vc ) cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); else if ( __vcpu_on_q(svc) ) + { __q_remove(svc); + __replq_remove(ops, svc); + } else if ( svc->flags & RTDS_delayed_runq_add ) clear_bit(__RTDS_delayed_runq_add, &svc->flags); } @@ -1008,10 +1137,7 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) { struct rt_vcpu * const svc = rt_vcpu(vc); s_time_t now = NOW(); - struct rt_private *prv = rt_priv(ops); - struct rt_vcpu *snext = NULL; /* highest priority on RunQ */ - struct rt_dom *sdom = NULL; - cpumask_t *online; + bool_t missed; BUG_ON( is_idle_vcpu(vc) ); @@ -1033,32 +1159,40 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) else SCHED_STAT_CRANK(vcpu_wake_not_runnable); - /* If context hasn't been saved for this vcpu yet, we can't put it on - * the Runqueue/DepletedQ. Instead, we set a flag so that it will be - * put on the Runqueue/DepletedQ after the context has been saved. + /* + * If a deadline passed while svc was asleep/blocked, we need new + * scheduling parameters (a new deadline and full budget). + */ + if ( (missed = now >= svc->cur_deadline) ) + rt_update_deadline(now, svc); + + /* + * If context hasn't been saved for this vcpu yet, we can't put it on + * the run-queue/depleted-queue. Instead, we set the appropriate flag, + * the vcpu will be put back on queue after the context has been saved + * (in rt_context_save()). */ if ( unlikely(svc->flags & RTDS_scheduled) ) { - set_bit(__RTDS_delayed_runq_add, &svc->flags); + set_bit(__RTDS_delayed_runq_add, &svc->flags); + /* + * The vcpu is waking up already, and we didn't even had the time to + * remove its next replenishment event from the replenishment queue + * when it blocked! No big deal. If we did not miss the deadline in + * the meantime, let's just leave it there. If we did, let's remove it + * and queue a new one (to occur at our new deadline). + */ + if ( missed ) + replq_reinsert(ops, svc); return; } - if ( now >= svc->cur_deadline) - rt_update_deadline(now, svc); - + /* Replenishment event got cancelled when we blocked. Add it back. */ + __replq_insert(ops, svc); /* insert svc to runq/depletedq because svc is not in queue now */ __runq_insert(ops, svc); - __repl_update(ops, now); - - ASSERT(!list_empty(&prv->sdom)); - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); - online = cpupool_domain_cpumask(sdom->dom); - snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus */ - - runq_tickle(ops, snext); - - return; + runq_tickle(ops, svc); } /* @@ -1069,10 +1203,6 @@ static void rt_context_saved(const struct scheduler *ops, struct vcpu *vc) { struct rt_vcpu *svc = rt_vcpu(vc); - struct rt_vcpu *snext = NULL; - struct rt_dom *sdom = NULL; - struct rt_private *prv = rt_priv(ops); - cpumask_t *online; spinlock_t *lock = vcpu_schedule_lock_irq(vc); clear_bit(__RTDS_scheduled, &svc->flags); @@ -1084,15 +1214,11 @@ rt_context_saved(const struct scheduler *ops, struct vcpu *vc) likely(vcpu_runnable(vc)) ) { __runq_insert(ops, svc); - __repl_update(ops, NOW()); - - ASSERT(!list_empty(&prv->sdom)); - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); - online = cpupool_domain_cpumask(sdom->dom); - snext = __runq_pick(ops, online); /* pick snext from ALL cpus */ - - runq_tickle(ops, snext); + runq_tickle(ops, svc); } + else + __replq_remove(ops, svc); + out: vcpu_schedule_unlock_irq(lock, vc); } @@ -1150,6 +1276,71 @@ rt_dom_cntl( return rc; } +/* + * The replenishment timer handler picks vcpus + * from the replq and does the actual replenishment. + */ +static void repl_handler(void *data){ + unsigned long flags; + s_time_t now = NOW(); + struct scheduler *ops = data; + struct rt_private *prv = rt_priv(ops); + struct list_head *replq = rt_replq(ops); + struct timer *repl_timer = prv->repl_timer; + struct list_head *iter, *tmp; + struct rt_vcpu *svc = NULL; + + spin_lock_irqsave(&prv->lock, flags); + + stop_timer(repl_timer); + + list_for_each_safe(iter, tmp, replq) + { + svc = replq_elem(iter); + + if ( now < svc->cur_deadline ) + break; + + rt_update_deadline(now, svc); + + /* + * when the replenishment happens + * svc is either on a pcpu or on + * runq/depletedq + */ + if( __vcpu_on_q(svc) ) + { + /* put back to runq */ + __q_remove(svc); + __runq_insert(ops, svc); + } + + /* + * tickle regardless where it's at + * because a running vcpu could have + * a later deadline than others after + * replenishment + */ + runq_tickle(ops, svc); + + /* + * update replenishment event queue + * without reprogramming the timer + */ + deadline_queue_remove(replq, &svc->replq_elem); + deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq); + } + + /* + * use the vcpu that's on the top + * or else don't program the timer + */ + if( !list_empty(replq) ) + set_timer(repl_timer, replq_elem(replq->next)->cur_deadline); + + spin_unlock_irqrestore(&prv->lock, flags); +} + static const struct scheduler sched_rtds_def = { .name = "SMP RTDS Scheduler", .opt_name = "rtds",