Message ID | 1564654971-31328-15-git-send-email-chao.gao@intel.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | improve late microcode loading | expand |
On 01.08.2019 12:22, Chao Gao wrote: > @@ -234,6 +267,35 @@ bool microcode_update_cache(struct microcode_patch *patch) > } > > /* > + * Wait for a condition to be met with a timeout (us). > + */ > +static int wait_for_condition(int (*func)(void *data), void *data, > + unsigned int timeout) > +{ > + while ( !func(data) ) > + { > + if ( !timeout-- ) > + { > + printk("CPU%u: Timeout in %s\n", smp_processor_id(), __func__); I don't think __func__ is helpful here for problem analysis. Instead I think you would want to log either func or __builtin_return_address(0). > @@ -283,37 +345,105 @@ static int microcode_update_cpu(const struct microcode_patch *patch) > return err; > } > > -static long do_microcode_update(void *patch) > +static int do_microcode_update(void *patch) > { > - unsigned int cpu; > + unsigned int cpu = smp_processor_id(); > + unsigned int cpu_nr = num_online_cpus(); > + int ret; > > - /* store the patch after a successful loading */ > - if ( !microcode_update_cpu(patch) && patch ) > + /* Mark loading an ucode is in progress */ > + cmpxchg(&loading_state, LOADING_EXITED, LOADING_ENTERED); Why cmpxchg(), especially when you don't check whether the store has actually happened? (Same further down then.) > + cpumask_set_cpu(cpu, &cpu_callin_map); > + ret = wait_for_condition(wait_cpu_callin, (void *)(unsigned long)cpu_nr, > + MICROCODE_CALLIN_TIMEOUT_US); > + if ( ret ) > { > - spin_lock(µcode_mutex); > - microcode_update_cache(patch); > - spin_unlock(µcode_mutex); > - patch = NULL; > + cmpxchg(&loading_state, LOADING_ENTERED, LOADING_ABORTED); > + return ret; > } > > - if ( microcode_ops->end_update ) > - microcode_ops->end_update(); > + /* > + * Load microcode update on only one logical processor per core, or in > + * AMD's term, one core per compute unit. The one with the lowest thread > + * id among all siblings is chosen to perform the loading. > + */ > + if ( (cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu))) ) > + { > + static unsigned int panicked = 0; > + bool monitor; > + unsigned int done; > + unsigned long tick = 0; > > - cpu = cpumask_next(smp_processor_id(), &cpu_online_map); > - if ( cpu < nr_cpu_ids ) > - return continue_hypercall_on_cpu(cpu, do_microcode_update, patch); > + ret = microcode_ops->apply_microcode(patch); > + if ( !ret ) > + { > + unsigned int cpu2; > > - /* Free the patch if no CPU has loaded it successfully. */ > - if ( patch ) > - microcode_free_patch(patch); > + atomic_inc(&cpu_updated); > + /* Propagate revision number to all siblings */ > + for_each_cpu(cpu2, per_cpu(cpu_sibling_mask, cpu)) > + per_cpu(cpu_sig, cpu2).rev = this_cpu(cpu_sig).rev; > + } > > - return 0; > + /* > + * The first CPU reaching here will monitor the progress and emit > + * warning message if the duration is too long (e.g. >1 second). > + */ > + monitor = !atomic_inc_return(&cpu_out); > + if ( monitor ) > + tick = rdtsc_ordered(); > + > + /* Waiting for all cores or computing units finishing update */ > + done = atomic_read(&cpu_out); > + while ( panicked && done != nr_cores ) Don't you mean "!panicked" here, or || instead of && ? Otherwise I don't see how the loop would ever be entered. > + { > + /* > + * During each timeout interval, at least a CPU is expected to > + * finish its update. Otherwise, something goes wrong. > + * > + * Note that RDTSC (in wait_for_condition()) is safe for threads to > + * execute while waiting for completion of loading an update. > + */ > + if ( wait_for_condition(&wait_cpu_callout, > + (void *)(unsigned long)(done + 1), > + MICROCODE_UPDATE_TIMEOUT_US) && > + !cmpxchg(&panicked, 0, 1) ) > + panic("Timeout when finishing updating microcode (finished %u/%u)", > + done, nr_cores); > + > + /* Print warning message once if long time is spent here */ > + if ( monitor ) > + { > + if ( rdtsc_ordered() - tick >= cpu_khz * 1000 ) > + { > + printk(XENLOG_WARNING "WARNING: UPDATING MICROCODE HAS CONSUMED MORE THAN 1 SECOND!\n"); Please save a little bit on line length by wrapping lines before the initial quote. > + monitor = false; > + } > + } Please combine both if()-s. And please also consider dropping the "monitor" local variable - "tick" being non-zero can easily replace it afaics. > + done = atomic_read(&cpu_out); > + } > + > + /* Mark loading is done to unblock other threads */ > + loading_state = LOADING_EXITED; > + } > + else > + { > + while ( loading_state == LOADING_ENTERED ) > + rep_nop(); Instead of the "for_each_cpu(cpu2, per_cpu(cpu_sibling_mask, cpu))" above, wouldn't it be more logical to have each thread update its signature here? Also - do you perhaps mean cpu_relax() instead of rep_nop()? > @@ -344,19 +481,67 @@ int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void) buf, unsigned long len) > { > ret = PTR_ERR(patch); > printk(XENLOG_INFO "Parsing microcode blob error %d\n", ret); > - goto free; > + goto put; > } > > if ( !patch ) > { > printk(XENLOG_INFO "No ucode found. Update aborted!\n"); > ret = -EINVAL; > - goto free; > + goto put; > + } > + > + cpumask_clear(&cpu_callin_map); > + atomic_set(&cpu_out, 0); > + atomic_set(&cpu_updated, 0); > + loading_state = LOADING_EXITED; > + > + /* Calculate the number of online CPU core */ > + nr_cores = 0; > + for_each_online_cpu(cpu) > + if ( cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu)) ) > + nr_cores++; > + > + printk(XENLOG_INFO "%u cores are to update their microcode\n", nr_cores); > + > + /* > + * We intend to disable interrupt for long time, which may lead to > + * watchdog timeout. > + */ > + watchdog_disable(); > + /* > + * Late loading dance. Why the heavy-handed stop_machine effort? > + * > + * - HT siblings must be idle and not execute other code while the other > + * sibling is loading microcode in order to avoid any negative > + * interactions cause by the loading. > + * > + * - In addition, microcode update on the cores must be serialized until > + * this requirement can be relaxed in the future. Right now, this is > + * conservative and good. > + */ > + ret = stop_machine_run(do_microcode_update, patch, NR_CPUS); > + watchdog_enable(); > + > + updated = atomic_read(&cpu_updated); > + if ( updated > 0 ) > + { > + spin_lock(µcode_mutex); > + microcode_update_cache(patch); > + spin_unlock(µcode_mutex); > } > + else > + microcode_free_patch(patch); > > - ret = continue_hypercall_on_cpu(cpumask_first(&cpu_online_map), > - do_microcode_update, patch); > + if ( updated && updated != nr_cores ) > + printk(XENLOG_ERR > + "ERROR: Updating microcode succeeded on %u cores and failed on\n" > + "other %u cores. A system with differing microcode revisions is\n" > + "considered unstable. Please reboot and do not load the microcode\n" > + "that triggers this warning!\n", updated, nr_cores - updated); Note that for such multi-line log messages you need to prefix XENLOG_* to every one of them. Jan
On 01.08.2019 12:22, Chao Gao wrote: > @@ -283,37 +345,105 @@ static int microcode_update_cpu(const struct microcode_patch *patch) > return err; > } > > -static long do_microcode_update(void *patch) > +static int do_microcode_update(void *patch) > { > - unsigned int cpu; > + unsigned int cpu = smp_processor_id(); > + unsigned int cpu_nr = num_online_cpus(); > + int ret; > > - /* store the patch after a successful loading */ > - if ( !microcode_update_cpu(patch) && patch ) > + /* Mark loading an ucode is in progress */ > + cmpxchg(&loading_state, LOADING_EXITED, LOADING_ENTERED); > + cpumask_set_cpu(cpu, &cpu_callin_map); > + ret = wait_for_condition(wait_cpu_callin, (void *)(unsigned long)cpu_nr, > + MICROCODE_CALLIN_TIMEOUT_US); One more minor request - just like you do here, ... > + if ( ret ) > { > - spin_lock(µcode_mutex); > - microcode_update_cache(patch); > - spin_unlock(µcode_mutex); > - patch = NULL; > + cmpxchg(&loading_state, LOADING_ENTERED, LOADING_ABORTED); > + return ret; > } > > - if ( microcode_ops->end_update ) > - microcode_ops->end_update(); > + /* > + * Load microcode update on only one logical processor per core, or in > + * AMD's term, one core per compute unit. The one with the lowest thread > + * id among all siblings is chosen to perform the loading. > + */ > + if ( (cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu))) ) > + { > + static unsigned int panicked = 0; > + bool monitor; > + unsigned int done; > + unsigned long tick = 0; > > - cpu = cpumask_next(smp_processor_id(), &cpu_online_map); > - if ( cpu < nr_cpu_ids ) > - return continue_hypercall_on_cpu(cpu, do_microcode_update, patch); > + ret = microcode_ops->apply_microcode(patch); > + if ( !ret ) > + { > + unsigned int cpu2; > > - /* Free the patch if no CPU has loaded it successfully. */ > - if ( patch ) > - microcode_free_patch(patch); > + atomic_inc(&cpu_updated); > + /* Propagate revision number to all siblings */ > + for_each_cpu(cpu2, per_cpu(cpu_sibling_mask, cpu)) > + per_cpu(cpu_sig, cpu2).rev = this_cpu(cpu_sig).rev; > + } > > - return 0; > + /* > + * The first CPU reaching here will monitor the progress and emit > + * warning message if the duration is too long (e.g. >1 second). > + */ > + monitor = !atomic_inc_return(&cpu_out); > + if ( monitor ) > + tick = rdtsc_ordered(); > + > + /* Waiting for all cores or computing units finishing update */ > + done = atomic_read(&cpu_out); > + while ( panicked && done != nr_cores ) > + { > + /* > + * During each timeout interval, at least a CPU is expected to > + * finish its update. Otherwise, something goes wrong. > + * > + * Note that RDTSC (in wait_for_condition()) is safe for threads to > + * execute while waiting for completion of loading an update. > + */ > + if ( wait_for_condition(&wait_cpu_callout, > + (void *)(unsigned long)(done + 1), > + MICROCODE_UPDATE_TIMEOUT_US) && ... could you please omit the redundant & on the first argument here? Jan
On Mon, Aug 05, 2019 at 10:43:16AM +0000, Jan Beulich wrote: >On 01.08.2019 12:22, Chao Gao wrote: >> @@ -234,6 +267,35 @@ bool microcode_update_cache(struct microcode_patch *patch) >> } >> >> /* >> + * Wait for a condition to be met with a timeout (us). >> + */ >> +static int wait_for_condition(int (*func)(void *data), void *data, >> + unsigned int timeout) >> +{ >> + while ( !func(data) ) >> + { >> + if ( !timeout-- ) >> + { >> + printk("CPU%u: Timeout in %s\n", smp_processor_id(), __func__); > >I don't think __func__ is helpful here for problem analysis. Instead >I think you would want to log either func or __builtin_return_address(0). Will do. > >> @@ -283,37 +345,105 @@ static int microcode_update_cpu(const struct microcode_patch *patch) >> return err; >> } >> >> -static long do_microcode_update(void *patch) >> +static int do_microcode_update(void *patch) >> { >> - unsigned int cpu; >> + unsigned int cpu = smp_processor_id(); >> + unsigned int cpu_nr = num_online_cpus(); >> + int ret; >> >> - /* store the patch after a successful loading */ >> - if ( !microcode_update_cpu(patch) && patch ) >> + /* Mark loading an ucode is in progress */ >> + cmpxchg(&loading_state, LOADING_EXITED, LOADING_ENTERED); > >Why cmpxchg(), especially when you don't check whether the store >has actually happened? (Same further down then.) loading_state is set to "LOADING_EXITED" by the caller. So the first CPU coming here would store "LOADING_ENTERED" to it. And we don't need special handling for the CPU that sets the state, so the return value isn't checked. Here are my considerations about how to set the state: 1) We cannot set the state in the caller. Because we would use this state to block #NMI handling. Setting the state in the caller may break stop_machine_run mechanism with the patch 16. 2) The first CPU reaching here should set the state (it means we cannot choose one CPU, e.g. BSP, to do it). With this, #NMI handling is disabled system-wise before any CPU calls in. Otherwise, if there is an exception for a CPU, it may be still in #NMI handling, when its sibling thread starts loading an ucode. 3) A simple assignment on every CPU is problematic in some cases. For example, if one CPU comes in after other CPUs timed out and left, it might set the state to "LOADING_ENTERED" and be blocked in #NMI handling forever with patch 16. That's why I choose to use a cmpxhg(). Regarding the cmpxchg() in error-handling below, it can be replaced by a simple assignment. But I am not sure whether it is better to use cmpxchg() considering cache line bouncing. > >> + cpumask_set_cpu(cpu, &cpu_callin_map); >> + ret = wait_for_condition(wait_cpu_callin, (void *)(unsigned long)cpu_nr, >> + MICROCODE_CALLIN_TIMEOUT_US); >> + if ( ret ) >> { >> - spin_lock(µcode_mutex); >> - microcode_update_cache(patch); >> - spin_unlock(µcode_mutex); >> - patch = NULL; >> + cmpxchg(&loading_state, LOADING_ENTERED, LOADING_ABORTED); >> + return ret; >> } >> >> - if ( microcode_ops->end_update ) >> - microcode_ops->end_update(); >> + /* >> + * Load microcode update on only one logical processor per core, or in >> + * AMD's term, one core per compute unit. The one with the lowest thread >> + * id among all siblings is chosen to perform the loading. >> + */ >> + if ( (cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu))) ) >> + { >> + static unsigned int panicked = 0; >> + bool monitor; >> + unsigned int done; >> + unsigned long tick = 0; >> >> - cpu = cpumask_next(smp_processor_id(), &cpu_online_map); >> - if ( cpu < nr_cpu_ids ) >> - return continue_hypercall_on_cpu(cpu, do_microcode_update, patch); >> + ret = microcode_ops->apply_microcode(patch); >> + if ( !ret ) >> + { >> + unsigned int cpu2; >> >> - /* Free the patch if no CPU has loaded it successfully. */ >> - if ( patch ) >> - microcode_free_patch(patch); >> + atomic_inc(&cpu_updated); >> + /* Propagate revision number to all siblings */ >> + for_each_cpu(cpu2, per_cpu(cpu_sibling_mask, cpu)) >> + per_cpu(cpu_sig, cpu2).rev = this_cpu(cpu_sig).rev; >> + } >> >> - return 0; >> + /* >> + * The first CPU reaching here will monitor the progress and emit >> + * warning message if the duration is too long (e.g. >1 second). >> + */ >> + monitor = !atomic_inc_return(&cpu_out); >> + if ( monitor ) >> + tick = rdtsc_ordered(); >> + >> + /* Waiting for all cores or computing units finishing update */ >> + done = atomic_read(&cpu_out); >> + while ( panicked && done != nr_cores ) > >Don't you mean "!panicked" here, or || instead of && ? Otherwise I don't >see how the loop would ever be entered. Sorry. It should be !panicked. Other comments are reasonable and I will follow your suggestions. Thanks Chao
On 05.08.2019 15:16, Chao Gao wrote: > On Mon, Aug 05, 2019 at 10:43:16AM +0000, Jan Beulich wrote: >> On 01.08.2019 12:22, Chao Gao wrote: >>> @@ -283,37 +345,105 @@ static int microcode_update_cpu(const struct microcode_patch *patch) >>> return err; >>> } >>> >>> -static long do_microcode_update(void *patch) >>> +static int do_microcode_update(void *patch) >>> { >>> - unsigned int cpu; >>> + unsigned int cpu = smp_processor_id(); >>> + unsigned int cpu_nr = num_online_cpus(); >>> + int ret; >>> >>> - /* store the patch after a successful loading */ >>> - if ( !microcode_update_cpu(patch) && patch ) >>> + /* Mark loading an ucode is in progress */ >>> + cmpxchg(&loading_state, LOADING_EXITED, LOADING_ENTERED); >> >> Why cmpxchg(), especially when you don't check whether the store >> has actually happened? (Same further down then.) > > loading_state is set to "LOADING_EXITED" by the caller. So the first CPU > coming here would store "LOADING_ENTERED" to it. And we don't need > special handling for the CPU that sets the state, so the return value > isn't checked. > > Here are my considerations about how to set the state: > 1) We cannot set the state in the caller. Because we would use this > state to block #NMI handling. Setting the state in the caller may > break stop_machine_run mechanism with the patch 16. > > 2) The first CPU reaching here should set the state (it means we cannot > choose one CPU, e.g. BSP, to do it). With this, #NMI handling is > disabled system-wise before any CPU calls in. Otherwise, if there is > an exception for a CPU, it may be still in #NMI handling, when its > sibling thread starts loading an ucode. > > 3) A simple assignment on every CPU is problematic in some cases. > For example, if one CPU comes in after other CPUs timed out and left, > it might set the state to "LOADING_ENTERED" and be blocked in #NMI > handling forever with patch 16. > > That's why I choose to use a cmpxhg(). But if the expected incoming state is _not_ the one actually found in memory, then the cmpxchg() will silently degenerate to a no-op, without anyone noticing other than the system misbehaving in an unpredictable way. On the whole, seeing your explanation above, there is merit to using cmpxchg(). But at the very least this background needs to be put in the description; perhaps better in a code comment next to the variable definition. > Regarding the cmpxchg() in error-handling below, it can be replaced by > a simple assignment. But I am not sure whether it is better to use > cmpxchg() considering cache line bouncing. I didn't think MOV would cause more heavy cacheline bouncing compared to CMPXCHG. >>> + cpumask_set_cpu(cpu, &cpu_callin_map); >>> + ret = wait_for_condition(wait_cpu_callin, (void *)(unsigned long)cpu_nr, >>> + MICROCODE_CALLIN_TIMEOUT_US); >>> + if ( ret ) >>> { >>> - spin_lock(µcode_mutex); >>> - microcode_update_cache(patch); >>> - spin_unlock(µcode_mutex); >>> - patch = NULL; >>> + cmpxchg(&loading_state, LOADING_ENTERED, LOADING_ABORTED); >>> + return ret; >>> } >>> >>> - if ( microcode_ops->end_update ) >>> - microcode_ops->end_update(); >>> + /* >>> + * Load microcode update on only one logical processor per core, or in >>> + * AMD's term, one core per compute unit. The one with the lowest thread >>> + * id among all siblings is chosen to perform the loading. >>> + */ >>> + if ( (cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu))) ) >>> + { >>> + static unsigned int panicked = 0; >>> + bool monitor; >>> + unsigned int done; >>> + unsigned long tick = 0; >>> >>> - cpu = cpumask_next(smp_processor_id(), &cpu_online_map); >>> - if ( cpu < nr_cpu_ids ) >>> - return continue_hypercall_on_cpu(cpu, do_microcode_update, patch); >>> + ret = microcode_ops->apply_microcode(patch); >>> + if ( !ret ) >>> + { >>> + unsigned int cpu2; >>> >>> - /* Free the patch if no CPU has loaded it successfully. */ >>> - if ( patch ) >>> - microcode_free_patch(patch); >>> + atomic_inc(&cpu_updated); >>> + /* Propagate revision number to all siblings */ >>> + for_each_cpu(cpu2, per_cpu(cpu_sibling_mask, cpu)) >>> + per_cpu(cpu_sig, cpu2).rev = this_cpu(cpu_sig).rev; >>> + } >>> >>> - return 0; >>> + /* >>> + * The first CPU reaching here will monitor the progress and emit >>> + * warning message if the duration is too long (e.g. >1 second). >>> + */ >>> + monitor = !atomic_inc_return(&cpu_out); >>> + if ( monitor ) >>> + tick = rdtsc_ordered(); >>> + >>> + /* Waiting for all cores or computing units finishing update */ >>> + done = atomic_read(&cpu_out); >>> + while ( panicked && done != nr_cores ) >> >> Don't you mean "!panicked" here, or || instead of && ? Otherwise I don't >> see how the loop would ever be entered. > > Sorry. It should be !panicked. I.e. you mean to exit the loop on all CPUs once one of them has invoked panic()? I did append the alternative for a reason - it seems to me that keeping CPUs in this loop after a panic() might be better. Jan
diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c index cbaf13d..67549be 100644 --- a/xen/arch/x86/microcode.c +++ b/xen/arch/x86/microcode.c @@ -30,18 +30,41 @@ #include <xen/smp.h> #include <xen/softirq.h> #include <xen/spinlock.h> +#include <xen/stop_machine.h> #include <xen/tasklet.h> #include <xen/guest_access.h> #include <xen/earlycpio.h> +#include <xen/watchdog.h> +#include <asm/delay.h> #include <asm/msr.h> #include <asm/processor.h> #include <asm/setup.h> #include <asm/microcode.h> +/* + * Before performing a late microcode update on any thread, we + * rendezvous all cpus in stop_machine context. The timeout for + * waiting for cpu rendezvous is 30ms. It is the timeout used by + * live patching + */ +#define MICROCODE_CALLIN_TIMEOUT_US 30000 + +/* + * Timeout for each thread to complete update is set to 1s. It is a + * conservative choice considering all possible interference. + */ +#define MICROCODE_UPDATE_TIMEOUT_US 1000000 + static module_t __initdata ucode_mod; static signed int __initdata ucode_mod_idx; static bool_t __initdata ucode_mod_forced; +static unsigned int nr_cores; +static enum { + LOADING_EXITED, + LOADING_ENTERED, + LOADING_ABORTED, +} loading_state; /* * If we scan the initramfs.cpio for the early microcode code @@ -190,6 +213,16 @@ static DEFINE_SPINLOCK(microcode_mutex); DEFINE_PER_CPU(struct cpu_signature, cpu_sig); /* + * Count the CPUs that have entered, exited the rendezvous and succeeded in + * microcode update during late microcode update respectively. + * + * Note that a bitmap is used for callin to allow cpu to set a bit multiple + * times. It is required to do busy-loop in #NMI handling. + */ +static cpumask_t cpu_callin_map; +static atomic_t cpu_out, cpu_updated; + +/* * Return a patch that covers current CPU. If there are multiple patches, * return the one with the highest revision number. Return error If no * patch is found and an error occurs during the parsing process. Otherwise @@ -234,6 +267,35 @@ bool microcode_update_cache(struct microcode_patch *patch) } /* + * Wait for a condition to be met with a timeout (us). + */ +static int wait_for_condition(int (*func)(void *data), void *data, + unsigned int timeout) +{ + while ( !func(data) ) + { + if ( !timeout-- ) + { + printk("CPU%u: Timeout in %s\n", smp_processor_id(), __func__); + return -EBUSY; + } + udelay(1); + } + + return 0; +} + +static int wait_cpu_callin(void *nr) +{ + return cpumask_weight(&cpu_callin_map) >= (unsigned long)nr; +} + +static int wait_cpu_callout(void *nr) +{ + return atomic_read(&cpu_out) >= (unsigned long)nr; +} + +/* * Load a microcode update to current CPU. * * If no patch is provided, the cached patch will be loaded. Microcode update @@ -283,37 +345,105 @@ static int microcode_update_cpu(const struct microcode_patch *patch) return err; } -static long do_microcode_update(void *patch) +static int do_microcode_update(void *patch) { - unsigned int cpu; + unsigned int cpu = smp_processor_id(); + unsigned int cpu_nr = num_online_cpus(); + int ret; - /* store the patch after a successful loading */ - if ( !microcode_update_cpu(patch) && patch ) + /* Mark loading an ucode is in progress */ + cmpxchg(&loading_state, LOADING_EXITED, LOADING_ENTERED); + cpumask_set_cpu(cpu, &cpu_callin_map); + ret = wait_for_condition(wait_cpu_callin, (void *)(unsigned long)cpu_nr, + MICROCODE_CALLIN_TIMEOUT_US); + if ( ret ) { - spin_lock(µcode_mutex); - microcode_update_cache(patch); - spin_unlock(µcode_mutex); - patch = NULL; + cmpxchg(&loading_state, LOADING_ENTERED, LOADING_ABORTED); + return ret; } - if ( microcode_ops->end_update ) - microcode_ops->end_update(); + /* + * Load microcode update on only one logical processor per core, or in + * AMD's term, one core per compute unit. The one with the lowest thread + * id among all siblings is chosen to perform the loading. + */ + if ( (cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu))) ) + { + static unsigned int panicked = 0; + bool monitor; + unsigned int done; + unsigned long tick = 0; - cpu = cpumask_next(smp_processor_id(), &cpu_online_map); - if ( cpu < nr_cpu_ids ) - return continue_hypercall_on_cpu(cpu, do_microcode_update, patch); + ret = microcode_ops->apply_microcode(patch); + if ( !ret ) + { + unsigned int cpu2; - /* Free the patch if no CPU has loaded it successfully. */ - if ( patch ) - microcode_free_patch(patch); + atomic_inc(&cpu_updated); + /* Propagate revision number to all siblings */ + for_each_cpu(cpu2, per_cpu(cpu_sibling_mask, cpu)) + per_cpu(cpu_sig, cpu2).rev = this_cpu(cpu_sig).rev; + } - return 0; + /* + * The first CPU reaching here will monitor the progress and emit + * warning message if the duration is too long (e.g. >1 second). + */ + monitor = !atomic_inc_return(&cpu_out); + if ( monitor ) + tick = rdtsc_ordered(); + + /* Waiting for all cores or computing units finishing update */ + done = atomic_read(&cpu_out); + while ( panicked && done != nr_cores ) + { + /* + * During each timeout interval, at least a CPU is expected to + * finish its update. Otherwise, something goes wrong. + * + * Note that RDTSC (in wait_for_condition()) is safe for threads to + * execute while waiting for completion of loading an update. + */ + if ( wait_for_condition(&wait_cpu_callout, + (void *)(unsigned long)(done + 1), + MICROCODE_UPDATE_TIMEOUT_US) && + !cmpxchg(&panicked, 0, 1) ) + panic("Timeout when finishing updating microcode (finished %u/%u)", + done, nr_cores); + + /* Print warning message once if long time is spent here */ + if ( monitor ) + { + if ( rdtsc_ordered() - tick >= cpu_khz * 1000 ) + { + printk(XENLOG_WARNING "WARNING: UPDATING MICROCODE HAS CONSUMED MORE THAN 1 SECOND!\n"); + monitor = false; + } + } + + done = atomic_read(&cpu_out); + } + + /* Mark loading is done to unblock other threads */ + loading_state = LOADING_EXITED; + } + else + { + while ( loading_state == LOADING_ENTERED ) + rep_nop(); + } + + if ( microcode_ops->end_update ) + microcode_ops->end_update(); + + return ret; } int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void) buf, unsigned long len) { int ret; void *buffer; + unsigned int cpu, updated; struct microcode_patch *patch; if ( len != (uint32_t)len ) @@ -332,11 +462,18 @@ int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void) buf, unsigned long len) goto free; } + /* cpu_online_map must not change during update */ + if ( !get_cpu_maps() ) + { + ret = -EBUSY; + goto free; + } + if ( microcode_ops->start_update ) { ret = microcode_ops->start_update(); if ( ret != 0 ) - goto free; + goto put; } patch = microcode_parse_blob(buffer, len); @@ -344,19 +481,67 @@ int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void) buf, unsigned long len) { ret = PTR_ERR(patch); printk(XENLOG_INFO "Parsing microcode blob error %d\n", ret); - goto free; + goto put; } if ( !patch ) { printk(XENLOG_INFO "No ucode found. Update aborted!\n"); ret = -EINVAL; - goto free; + goto put; + } + + cpumask_clear(&cpu_callin_map); + atomic_set(&cpu_out, 0); + atomic_set(&cpu_updated, 0); + loading_state = LOADING_EXITED; + + /* Calculate the number of online CPU core */ + nr_cores = 0; + for_each_online_cpu(cpu) + if ( cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu)) ) + nr_cores++; + + printk(XENLOG_INFO "%u cores are to update their microcode\n", nr_cores); + + /* + * We intend to disable interrupt for long time, which may lead to + * watchdog timeout. + */ + watchdog_disable(); + /* + * Late loading dance. Why the heavy-handed stop_machine effort? + * + * - HT siblings must be idle and not execute other code while the other + * sibling is loading microcode in order to avoid any negative + * interactions cause by the loading. + * + * - In addition, microcode update on the cores must be serialized until + * this requirement can be relaxed in the future. Right now, this is + * conservative and good. + */ + ret = stop_machine_run(do_microcode_update, patch, NR_CPUS); + watchdog_enable(); + + updated = atomic_read(&cpu_updated); + if ( updated > 0 ) + { + spin_lock(µcode_mutex); + microcode_update_cache(patch); + spin_unlock(µcode_mutex); } + else + microcode_free_patch(patch); - ret = continue_hypercall_on_cpu(cpumask_first(&cpu_online_map), - do_microcode_update, patch); + if ( updated && updated != nr_cores ) + printk(XENLOG_ERR + "ERROR: Updating microcode succeeded on %u cores and failed on\n" + "other %u cores. A system with differing microcode revisions is\n" + "considered unstable. Please reboot and do not load the microcode\n" + "that triggers this warning!\n", updated, nr_cores - updated); + put: + put_cpu_maps(); free: xfree(buffer); return ret;