@@ -168,14 +168,14 @@ static void cf_check xen_evtchn_upcall(void)
while ( pending )
{
- unsigned int l1 = find_first_set_bit(pending);
+ unsigned int l1 = ffsl(pending) - 1;
unsigned long evtchn = xchg(&XEN_shared_info->evtchn_pending[l1], 0);
__clear_bit(l1, &pending);
evtchn &= ~XEN_shared_info->evtchn_mask[l1];
while ( evtchn )
{
- unsigned int port = find_first_set_bit(evtchn);
+ unsigned int port = ffsl(evtchn) - 1;
__clear_bit(port, &evtchn);
port += l1 * BITS_PER_LONG;
@@ -139,7 +139,7 @@ static int __init pvh_populate_memory_range(struct domain *d,
order = get_order_from_pages(end - start + 1);
order = min(order ? order - 1 : 0, max_order);
/* The order allocated and populated must be aligned to the address. */
- order = min(order, start ? find_first_set_bit(start) : MAX_ORDER);
+ order = min(order, start ? ffsl(start) - 1 : MAX_ORDER);
page = alloc_domheap_pages(d, order, dom0_memflags | MEMF_no_scrub);
if ( page == NULL )
{
@@ -335,7 +335,7 @@ static void timer_sanitize_int_route(HPETState *h, unsigned int tn)
* enabled pick the first irq.
*/
timer_config(h, tn) |=
- MASK_INSR(find_first_set_bit(timer_int_route_cap(h, tn)),
+ MASK_INSR(ffsl(timer_int_route_cap(h, tn)) - 1,
HPET_TN_ROUTE);
}
@@ -409,7 +409,7 @@ static int cf_check hpet_write(
{
bool active;
- i = find_first_set_bit(new_val);
+ i = ffsl(new_val) - 1;
if ( i >= HPET_TIMER_NUM )
break;
__clear_bit(i, &new_val);
@@ -535,14 +535,14 @@ static int cf_check hpet_write(
/* stop/start timers whos state was changed by this write. */
while (stop_timers)
{
- i = find_first_set_bit(stop_timers);
+ i = ffsl(stop_timers) - 1;
__clear_bit(i, &stop_timers);
hpet_stop_timer(h, i, guest_time);
}
while (start_timers)
{
- i = find_first_set_bit(start_timers);
+ i = ffsl(start_timers) - 1;
__clear_bit(i, &start_timers);
hpet_set_timer(h, i, guest_time);
}
@@ -60,7 +60,7 @@ static bool pt_update_contig_markers(uint64_t *pt, unsigned int idx,
/* Step 1: Reduce markers in lower numbered entries. */
while ( i )
{
- b = find_first_set_bit(i);
+ b = ffsl(i) - 1;
i &= ~(1U << b);
if ( GET_MARKER(pt[i]) <= b )
break;
@@ -3424,7 +3424,7 @@ static int vcpumask_to_pcpumask(
{
unsigned int cpu;
- vcpu_id = find_first_set_bit(vmask);
+ vcpu_id = ffsl(vmask) - 1;
vmask &= ~(1UL << vcpu_id);
vcpu_id += vcpu_bias;
if ( (vcpu_id >= d->max_vcpus) )
@@ -684,7 +684,7 @@ unsigned long
p2m_pod_decrease_reservation(struct domain *d, gfn_t gfn, unsigned int order)
{
unsigned long left = 1UL << order, ret = 0;
- unsigned int chunk_order = find_first_set_bit(gfn_x(gfn) | left);
+ unsigned int chunk_order = ffsl(gfn_x(gfn) | left) - 1;
do {
ret += decrease_reservation(d, gfn, chunk_order);
@@ -1393,7 +1393,7 @@ guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
unsigned int order)
{
unsigned long left = 1UL << order;
- unsigned int chunk_order = find_first_set_bit(gfn | left);
+ unsigned int chunk_order = ffsl(gfn | left) - 1;
int rc;
if ( !paging_mode_translate(d) )
@@ -1819,7 +1819,7 @@ static void _init_heap_pages(const struct page_info *pg,
if ( unlikely(!avail[nid]) )
{
bool use_tail = IS_ALIGNED(s, 1UL << MAX_ORDER) &&
- (find_first_set_bit(e) <= find_first_set_bit(s));
+ (ffsl(e) <= ffsl(s));
unsigned long n;
n = init_node_heap(nid, s, nr_pages, &use_tail);
@@ -48,7 +48,7 @@ static void __do_softirq(unsigned long ignore_mask)
|| cpu_is_offline(cpu) )
break;
- i = find_first_set_bit(pending);
+ i = ffsl(pending) - 1;
clear_bit(i, &softirq_pending(cpu));
(*softirq_handlers[i])();
}
@@ -137,7 +137,7 @@ static void set_iommu_ptes_present(unsigned long pt_mfn,
ASSERT(!pde->u);
if ( pde > table )
- ASSERT(pde->ign0 == find_first_set_bit(pde - table));
+ ASSERT(pde->ign0 == ffsl(pde - table) - 1);
else
ASSERT(pde->ign0 == CONTIG_LEVEL_SHIFT);
@@ -305,7 +305,7 @@ static unsigned int mapping_order(const struct domain_iommu *hd,
{
unsigned long res = dfn_x(dfn) | mfn_x(mfn);
unsigned long sizes = hd->platform_ops->page_sizes;
- unsigned int bit = find_first_set_bit(sizes), order = 0;
+ unsigned int bit = ffsl(sizes) - 1, order = 0;
ASSERT(bit == PAGE_SHIFT);
@@ -313,7 +313,7 @@ static unsigned int mapping_order(const struct domain_iommu *hd,
{
unsigned long mask;
- bit = find_first_set_bit(sizes);
+ bit = ffsl(sizes) - 1;
mask = (1UL << bit) - 1;
if ( nr <= mask || (res & mask) )
break;
@@ -641,7 +641,7 @@ struct page_info *iommu_alloc_pgtable(struct domain_iommu *hd,
if ( contig_mask )
{
/* See pt-contig-markers.h for a description of the marker scheme. */
- unsigned int i, shift = find_first_set_bit(contig_mask);
+ unsigned int i, shift = ffsl(contig_mask) - 1;
ASSERT((CONTIG_LEVEL_SHIFT & (contig_mask >> shift)) == CONTIG_LEVEL_SHIFT);
@@ -652,7 +652,7 @@ struct page_info *iommu_alloc_pgtable(struct domain_iommu *hd,
for ( i = 4; i < PAGE_SIZE / sizeof(*p); i += 4 )
{
- p[i + 0] = (find_first_set_bit(i) + 0ULL) << shift;
+ p[i + 0] = (ffsl(i) - 1ULL) << shift;
p[i + 1] = 0;
p[i + 2] = 1ULL << shift;
p[i + 3] = 0;
find_first_set_bit() is a Xen-ism which has undefined behaviour with a 0 input. The latter is well defined with an input of 0, and is a found outside of Xen too. _init_heap_pages() is the one special case here, comparing the LSB of two different addresses. The -1 cancels off both sides of the expression. No functional change. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Jan Beulich <JBeulich@suse.com> CC: Roger Pau Monné <roger.pau@citrix.com> CC: Wei Liu <wl@xen.org> CC: Stefano Stabellini <sstabellini@kernel.org> CC: Julien Grall <julien@xen.org> CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com> CC: Bertrand Marquis <bertrand.marquis@arm.com> CC: Michal Orzel <michal.orzel@amd.com> CC: Oleksii Kurochko <oleksii.kurochko@gmail.com> CC: Shawn Anastasio <sanastasio@raptorengineering.com> CC: consulting@bugseng.com <consulting@bugseng.com> CC: Simone Ballarin <simone.ballarin@bugseng.com> CC: Federico Serafini <federico.serafini@bugseng.com> CC: Nicola Vetrini <nicola.vetrini@bugseng.com> v2: * Reorder from later in the series to keep ARM bisectable In an x86 build, we get the following delta: add/remove: 0/0 grow/shrink: 2/4 up/down: 39/-52 (-13) Function old new delta hpet_write 2183 2206 +23 init_heap_pages 1222 1238 +16 dom0_construct_pvh 3959 3958 -1 mapping_order 139 126 -13 guest_physmap_mark_populate_on_demand 1301 1285 -16 vcpumask_to_pcpumask 525 503 -22 so the optimiser improvements for ffsl() really do speak for themselves. I'm surprised by the increase in hpet_write(), but looking at the code, it very clearly wants the same treatment as: commit 188fa82305e72b725473db9146e20cc9abf7bff3 Author: Andrew Cooper <andrew.cooper3@citrix.com> Date: Fri Mar 15 11:31:33 2024 xen/vpci: Improve code generation in mask_write() which I'm confident will end up as a net improvement. --- xen/arch/x86/guest/xen/xen.c | 4 ++-- xen/arch/x86/hvm/dom0_build.c | 2 +- xen/arch/x86/hvm/hpet.c | 8 ++++---- xen/arch/x86/include/asm/pt-contig-markers.h | 2 +- xen/arch/x86/mm.c | 2 +- xen/arch/x86/mm/p2m-pod.c | 4 ++-- xen/common/page_alloc.c | 2 +- xen/common/softirq.c | 2 +- xen/drivers/passthrough/amd/iommu_map.c | 2 +- xen/drivers/passthrough/iommu.c | 4 ++-- xen/drivers/passthrough/x86/iommu.c | 4 ++-- 11 files changed, 18 insertions(+), 18 deletions(-)