From patchwork Fri Oct 28 16:48:08 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Russell King (Oracle)" X-Patchwork-Id: 13024058 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from bombadil.infradead.org (bombadil.infradead.org [198.137.202.133]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id C3066ECAAA1 for ; Fri, 28 Oct 2022 16:52:01 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=lists.infradead.org; s=bombadil.20210309; h=Sender: Content-Transfer-Encoding:Content-Type:List-Subscribe:List-Help:List-Post: List-Archive:List-Unsubscribe:List-Id:Date:Message-Id:MIME-Version:Subject:Cc :To:From:References:In-Reply-To:Reply-To:Content-ID:Content-Description: Resent-Date:Resent-From:Resent-Sender:Resent-To:Resent-Cc:Resent-Message-ID: List-Owner; bh=XAkYtEkXjJLssmNy4lcbdTRcp1O9kFyeNGE6f8kS0oc=; b=PV6LrXhMLoxPNi 46z3P0r6MqHHiMvffGkFNxSSswfYBy6tzCqXNr0FTZcX17Lpem6ubdfEyB7xlDEm4N1SLvOFtFeuw EcsVuoiPGfETP5uCJ/a7vAwhor0vh1luJrfIfXl4sGpnfG+ycICCjgK4+PILHvI1aHoi00adcwyPM 1Mp5HsZfvDKztF+LJ6AnRm0OLmRDgrBmANrXUtDP5m8648wM4O1BujSHHCc9gFSI6j3wFfkktDj50 ga4UetqMoefkdNYfSoo7EUbhO+F5l2y2EuUeTq5scrJ6GoRR0anbYVEtNEhr84NneUUu+LQcT77gj MqiDn1ZzyXh+C/w7Ss7A==; Received: from localhost ([::1] helo=bombadil.infradead.org) by bombadil.infradead.org with esmtp (Exim 4.94.2 #2 (Red Hat Linux)) id 1ooSZD-0011yr-Hf; Fri, 28 Oct 2022 16:50:47 +0000 Received: from pandora.armlinux.org.uk ([2001:4d48:ad52:32c8:5054:ff:fe00:142]) by bombadil.infradead.org with esmtps (Exim 4.94.2 #2 (Red Hat Linux)) id 1ooSYg-00114U-Mm for linux-arm-kernel@lists.infradead.org; Fri, 28 Oct 2022 16:50:16 +0000 DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=armlinux.org.uk; s=pandora-2019; h=Date:Sender:Message-Id:Content-Type: Content-Transfer-Encoding:MIME-Version:Subject:Cc:To:From:References: In-Reply-To:Reply-To:Content-ID:Content-Description:Resent-Date:Resent-From: Resent-Sender:Resent-To:Resent-Cc:Resent-Message-ID:List-Id:List-Help: List-Unsubscribe:List-Subscribe:List-Post:List-Owner:List-Archive; bh=awLfhDYQKzr/5OLyGPM7CZguAgBNbZFtXaI32A3EvdI=; b=CO64jMczJw9bQw0Y3YstvDrriM M+W8fedhsaHxOfUSNWbJRsCBF9rAcxapIXf0Q+k955dpSOJDszQvU5HVXUWsWrkqzWJPK/g+/1k1i Z/3VObBwbMlz4Lj+7cCoKw3nsxnl7kWr15DLtET6C/fi3XOKJMgvZy5cHD1dz6pPzhukAgG/TZBMO x8EinmXLmk2YovZTrFKXEMB1CwzP1DSWAKLqosYnUJ4EBn10Efc57gXrVpn2e2HqO12vEjSyeu3zt Gfl/JjOzUvA9f27nbL2i0gtTAz5259q+dubis4/J1EQZQqAkFhxMBc7SgruCIV5YHCukHdTjHpZAJ BB0OGYoA==; Received: from e0022681537dd.dyn.armlinux.org.uk ([fd8f:7570:feb6:1:222:68ff:fe15:37dd]:33794 helo=rmk-PC.armlinux.org.uk) by pandora.armlinux.org.uk with esmtpsa (TLS1.3) tls TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.94.2) (envelope-from ) id 1ooSWf-000073-2Y; Fri, 28 Oct 2022 17:48:09 +0100 Received: from rmk by rmk-PC.armlinux.org.uk with local (Exim 4.94.2) (envelope-from ) id 1ooSWe-000FEG-G7; Fri, 28 Oct 2022 17:48:08 +0100 In-Reply-To: References: From: "Russell King (Oracle)" To: Yury Norov Cc: Catalin Marinas , Mark Rutland , Will Deacon , Linux Kernel Mailing List , Linus Torvalds , linux-arm-kernel@lists.infradead.org Subject: [PATCH 4/5] ARM: findbit: operate by words MIME-Version: 1.0 Content-Disposition: inline Message-Id: Date: Fri, 28 Oct 2022 17:48:08 +0100 X-CRM114-Version: 20100106-BlameMichelson ( TRE 0.8.0 (BSD) ) MR-646709E3 X-CRM114-CacheID: sfid-20221028_095014_791262_85861A19 X-CRM114-Status: GOOD ( 12.20 ) X-BeenThere: linux-arm-kernel@lists.infradead.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: "linux-arm-kernel" Errors-To: linux-arm-kernel-bounces+linux-arm-kernel=archiver.kernel.org@lists.infradead.org Convert the implementations to operate on words rather than bytes which makes bitmap searching faster. Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/assembler.h | 6 +++ arch/arm/lib/findbit.S | 78 ++++++++++++++++++-------------- 2 files changed, 50 insertions(+), 34 deletions(-) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 90fbe4a3f9c8..28e18f79c300 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -761,6 +761,12 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) .endif .endm + .if __LINUX_ARM_ARCH__ < 6 + .set .Lrev_l_uses_tmp, 1 + .else + .set .Lrev_l_uses_tmp, 0 + .endif + /* * bl_r - branch and link to register * diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S index 8280f66d38a5..6ec584d16d46 100644 --- a/arch/arm/lib/findbit.S +++ b/arch/arm/lib/findbit.S @@ -14,32 +14,32 @@ #include .text +#ifdef __ARMEB__ +#define SWAB_ENDIAN le +#else +#define SWAB_ENDIAN be +#endif + .macro find_first, endian, set, name ENTRY(_find_first_\name\()bit_\endian) teq r1, #0 beq 3f mov r2, #0 -1: - .ifc \endian, be - eor r3, r2, #0x18 - ARM( ldrb r3, [r0, r3, lsr #3] ) - THUMB( lsr r3, #3 ) - THUMB( ldrb r3, [r0, r3] ) +1: ldr r3, [r0], #4 + .ifeq \set + mvns r3, r3 @ invert/test bits .else - ARM( ldrb r3, [r0, r2, lsr #3] ) - THUMB( lsr r3, r2, #3 ) - THUMB( ldrb r3, [r0, r3] ) + movs r3, r3 @ test bits .endif - .ifeq \set - eors r3, r3, #0xff @ invert bits + .ifc \endian, SWAB_ENDIAN + bne .L_found_swab .else - movs r3, r3 + bne .L_found @ found the bit? .endif - bne .L_found @ any now set - found zero bit - add r2, r2, #8 @ next bit pointer + add r2, r2, #32 @ next index 2: cmp r2, r1 @ any more? blo 1b -3: mov r0, r1 @ no free bits +3: mov r0, r1 @ no more bits ret lr ENDPROC(_find_first_\name\()bit_\endian) .endm @@ -48,24 +48,25 @@ ENDPROC(_find_first_\name\()bit_\endian) ENTRY(_find_next_\name\()bit_\endian) cmp r2, r1 bhs 3b - ands ip, r2, #7 - beq 1b @ If new byte, goto old routine - .ifc \endian, be - eor r3, r2, #0x18 - ARM( ldrb r3, [r0, r3, lsr #3] ) - THUMB( lsr r3, #3 ) - THUMB( ldrb r3, [r0, r3] ) - .else - ARM( ldrb r3, [r0, r2, lsr #3] ) - THUMB( lsr r3, r2, #3 ) - THUMB( ldrb r3, [r0, r3] ) + mov ip, r2, lsr #5 @ word index + add r0, r0, ip, lsl #2 + ands ip, r2, #31 @ bit position + beq 1b + ldr r3, [r0], #4 + .ifeq \set + mvn r3, r3 @ invert bits + .endif + .ifc \endian, SWAB_ENDIAN + rev_l r3, ip + .if .Lrev_l_uses_tmp + @ we need to recompute ip because rev_l will have overwritten + @ it. + and ip, r2, #31 @ bit position .endif - .ifeq \set - eor r3, r3, #0xff @ now looking for a 1 bit .endif movs r3, r3, lsr ip @ shift off unused bits bne .L_found - orr r2, r2, #7 @ if zero, then no bits here + orr r2, r2, #31 @ no zero bits add r2, r2, #1 @ align bit pointer b 2b @ loop for next bit ENDPROC(_find_next_\name\()bit_\endian) @@ -95,6 +96,8 @@ ENDPROC(_find_next_\name\()bit_\endian) /* * One or more bits in the LSB of r3 are assumed to be set. */ +.L_found_swab: + rev_l r3, ip .L_found: #if __LINUX_ARM_ARCH__ >= 7 rbit r3, r3 @ reverse bits @@ -107,13 +110,20 @@ ENDPROC(_find_next_\name\()bit_\endian) rsb r3, r3, #31 @ offset of first set bit add r0, r2, r3 @ add offset of first set bit #else - tst r3, #0x0f + mov ip, #~0 + tst r3, ip, lsr #16 @ test bits 0-15 + addeq r2, r2, #16 + moveq r3, r3, lsr #16 + tst r3, #0x00ff + addeq r2, r2, #8 + moveq r3, r3, lsr #8 + tst r3, #0x000f addeq r2, r2, #4 - movne r3, r3, lsl #4 - tst r3, #0x30 + moveq r3, r3, lsr #4 + tst r3, #0x0003 addeq r2, r2, #2 - movne r3, r3, lsl #2 - tst r3, #0x40 + moveq r3, r3, lsr #2 + tst r3, #0x0001 addeq r2, r2, #1 mov r0, r2 #endif