diff mbox series

[RFC] regmap: maple: Switch to use irq-safe locking

Message ID 20240814-regcache-maple-irq-safe-v1-1-1b454c5767de@collabora.com (mailing list archive)
State New
Headers show
Series [RFC] regmap: maple: Switch to use irq-safe locking | expand

Commit Message

Cristian Ciocaltea Aug. 13, 2024, 10:20 p.m. UTC
Commit 3d59c22bbb8d ("drm/rockchip: vop2: Convert to use maple tree
register cache") enabled the use of maple tree register cache in
Rockchip VOP2 driver.  However, building the kernel with lockdep support
indicates locking rules violation when trying to unload the rockchipdrm
module:

[ 48.360258] ========================================================
[ 48.360829] WARNING: possible irq lock inversion dependency detected
[ 48.361400] 6.11.0-rc1 #40 Not tainted
[ 48.361743] --------------------------------------------------------
[ 48.362311] modprobe/685 just changed the state of lock:
[ 48.362790] ffff0000087fa798 (&mt->ma_lock){+...}-{2:2}, at: regcache_maple_exit+0x6c/0xe0
[ 48.363554] but this lock was taken by another, HARDIRQ-safe lock in the past:
[ 48.364212]  (rockchip_drm_vop2:3114:(&vop2_regmap_config)->lock){-.-.}-{2:2}
[ 48.364226]

             and interrupts could create inverse lock ordering between them.

[ 48.365874]
             other info that might help us debug this:
[ 48.366460]  Possible interrupt unsafe locking scenario:

[ 48.367069]        CPU0                    CPU1
[ 48.367478]        ----                    ----
[ 48.367889]   lock(&mt->ma_lock);
[ 48.368197]                                local_irq_disable();
[ 48.368729]                                lock(rockchip_drm_vop2:3114:(&vop2_regmap_config)->lock);
[ 48.369551]                                lock(&mt->ma_lock);
[ 48.370081]   <Interrupt>
[ 48.370336]     lock(rockchip_drm_vop2:3114:(&vop2_regmap_config)->lock);
[ 48.370957]
                *** DEADLOCK ***

[ 48.371489] 2 locks held by modprobe/685:
[ 48.371854]  #0: ffff0000018898f8 (&dev->mutex){....}-{3:3}, at: device_release_driver_internal+0x54/0x210
[ 48.372739]  #1: ffff800081c6ca80 (component_mutex){+.+.}-{3:3}, at: component_del+0x38/0x158
[ 48.373522]
               the shortest dependencies between 2nd lock and 1st lock:
[ 48.374235]  -> (rockchip_drm_vop2:3114:(&vop2_regmap_config)->lock){-.-.}-{2:2} {
[ 48.374941]     IN-HARDIRQ-W at:
[ 48.375239]                       lock_acquire+0x1d4/0x320
[ 48.375739]                       _raw_spin_lock_irqsave+0x6c/0x98
[ 48.376300]                       regmap_lock_spinlock+0x20/0x40
[ 48.376845]                       regmap_read+0x44/0x88
[ 48.377321]                       vop2_isr+0x90/0x290 [rockchipdrm]
[ 48.377919]                       __handle_irq_event_percpu+0x114/0x2b0
[ 48.378519]                       handle_irq_event+0x54/0xb8
[ 48.379032]                       handle_fasteoi_irq+0x158/0x228
[ 48.379577]                       generic_handle_domain_irq+0x34/0x58
[ 48.380160]                       gic_handle_irq+0xa4/0x114

[...]

[ 48.466666] -> (&mt->ma_lock){+...}-{2:2} {
[ 48.467066]    HARDIRQ-ON-W at:
[ 48.467360]                     lock_acquire+0x1d4/0x320
[ 48.467849]                     _raw_spin_lock+0x50/0x70
[ 48.468337]                     regcache_maple_exit+0x6c/0xe0
[ 48.468864]                     regcache_exit+0x8c/0xa8
[ 48.469344]                     regmap_exit+0x24/0x160
[ 48.469815]                     devm_regmap_release+0x1c/0x28
[ 48.470339]                     release_nodes+0x68/0xa8
[ 48.470818]                     devres_release_group+0x120/0x180
[ 48.471364]                     component_unbind+0x54/0x70
[ 48.471867]                     component_unbind_all+0xb0/0xe8
[ 48.472400]                     rockchip_drm_unbind+0x44/0x80 [rockchipdrm]
[ 48.473059]                     component_del+0xc8/0x158
[ 48.473545]                     dw_hdmi_rockchip_remove+0x28/0x40 [rockchipdrm]

The problem is that the regmap lock could be taken by an IRQ context,
interrupting the irq-unsafe maple tree lock, which may result in a lock
inversion deadlock scenario.

Switch to use irq-safe locking in the maple tree register cache.

Fixes: f033c26de5a5 ("regmap: Add maple tree based register cache")
Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
---
 drivers/base/regmap/regcache-maple.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)


---
base-commit: 9e6869691724b12e1f43655eeedc35fade38120c
change-id: 20240814-regcache-maple-irq-safe-1e93ffa4e146

Comments

Greg KH Aug. 14, 2024, 4:25 a.m. UTC | #1
On Wed, Aug 14, 2024 at 01:20:21AM +0300, Cristian Ciocaltea wrote:
> Commit 3d59c22bbb8d ("drm/rockchip: vop2: Convert to use maple tree
> register cache") enabled the use of maple tree register cache in
> Rockchip VOP2 driver.  However, building the kernel with lockdep support
> indicates locking rules violation when trying to unload the rockchipdrm
> module:
> 
> [ 48.360258] ========================================================
> [ 48.360829] WARNING: possible irq lock inversion dependency detected
> [ 48.361400] 6.11.0-rc1 #40 Not tainted
> [ 48.361743] --------------------------------------------------------
> [ 48.362311] modprobe/685 just changed the state of lock:
> [ 48.362790] ffff0000087fa798 (&mt->ma_lock){+...}-{2:2}, at: regcache_maple_exit+0x6c/0xe0
> [ 48.363554] but this lock was taken by another, HARDIRQ-safe lock in the past:
> [ 48.364212]  (rockchip_drm_vop2:3114:(&vop2_regmap_config)->lock){-.-.}-{2:2}
> [ 48.364226]
> 
>              and interrupts could create inverse lock ordering between them.
> 
> [ 48.365874]
>              other info that might help us debug this:
> [ 48.366460]  Possible interrupt unsafe locking scenario:
> 
> [ 48.367069]        CPU0                    CPU1
> [ 48.367478]        ----                    ----
> [ 48.367889]   lock(&mt->ma_lock);
> [ 48.368197]                                local_irq_disable();
> [ 48.368729]                                lock(rockchip_drm_vop2:3114:(&vop2_regmap_config)->lock);
> [ 48.369551]                                lock(&mt->ma_lock);
> [ 48.370081]   <Interrupt>
> [ 48.370336]     lock(rockchip_drm_vop2:3114:(&vop2_regmap_config)->lock);
> [ 48.370957]
>                 *** DEADLOCK ***
> 
> [ 48.371489] 2 locks held by modprobe/685:
> [ 48.371854]  #0: ffff0000018898f8 (&dev->mutex){....}-{3:3}, at: device_release_driver_internal+0x54/0x210
> [ 48.372739]  #1: ffff800081c6ca80 (component_mutex){+.+.}-{3:3}, at: component_del+0x38/0x158
> [ 48.373522]
>                the shortest dependencies between 2nd lock and 1st lock:
> [ 48.374235]  -> (rockchip_drm_vop2:3114:(&vop2_regmap_config)->lock){-.-.}-{2:2} {
> [ 48.374941]     IN-HARDIRQ-W at:
> [ 48.375239]                       lock_acquire+0x1d4/0x320
> [ 48.375739]                       _raw_spin_lock_irqsave+0x6c/0x98
> [ 48.376300]                       regmap_lock_spinlock+0x20/0x40
> [ 48.376845]                       regmap_read+0x44/0x88
> [ 48.377321]                       vop2_isr+0x90/0x290 [rockchipdrm]
> [ 48.377919]                       __handle_irq_event_percpu+0x114/0x2b0
> [ 48.378519]                       handle_irq_event+0x54/0xb8
> [ 48.379032]                       handle_fasteoi_irq+0x158/0x228
> [ 48.379577]                       generic_handle_domain_irq+0x34/0x58
> [ 48.380160]                       gic_handle_irq+0xa4/0x114
> 
> [...]
> 
> [ 48.466666] -> (&mt->ma_lock){+...}-{2:2} {
> [ 48.467066]    HARDIRQ-ON-W at:
> [ 48.467360]                     lock_acquire+0x1d4/0x320
> [ 48.467849]                     _raw_spin_lock+0x50/0x70
> [ 48.468337]                     regcache_maple_exit+0x6c/0xe0
> [ 48.468864]                     regcache_exit+0x8c/0xa8
> [ 48.469344]                     regmap_exit+0x24/0x160
> [ 48.469815]                     devm_regmap_release+0x1c/0x28
> [ 48.470339]                     release_nodes+0x68/0xa8
> [ 48.470818]                     devres_release_group+0x120/0x180
> [ 48.471364]                     component_unbind+0x54/0x70
> [ 48.471867]                     component_unbind_all+0xb0/0xe8
> [ 48.472400]                     rockchip_drm_unbind+0x44/0x80 [rockchipdrm]
> [ 48.473059]                     component_del+0xc8/0x158
> [ 48.473545]                     dw_hdmi_rockchip_remove+0x28/0x40 [rockchipdrm]
> 
> The problem is that the regmap lock could be taken by an IRQ context,
> interrupting the irq-unsafe maple tree lock, which may result in a lock
> inversion deadlock scenario.
> 
> Switch to use irq-safe locking in the maple tree register cache.
> 
> Fixes: f033c26de5a5 ("regmap: Add maple tree based register cache")
> Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
> ---
>  drivers/base/regmap/regcache-maple.c | 29 ++++++++++++++++++-----------
>  1 file changed, 18 insertions(+), 11 deletions(-)
> 

Hi,

This is the friendly patch-bot of Greg Kroah-Hartman.  You have sent him
a patch that has triggered this response.  He used to manually respond
to these common problems, but in order to save his sanity (he kept
writing the same thing over and over, yet to different people), I was
created.  Hopefully you will not take offence and will fix the problem
in your patch and resubmit it so that it can be accepted into the Linux
kernel tree.

You are receiving this message because of the following common error(s)
as indicated below:

- You have marked a patch with a "Fixes:" tag for a commit that is in an
  older released kernel, yet you do not have a cc: stable line in the
  signed-off-by area at all, which means that the patch will not be
  applied to any older kernel releases.  To properly fix this, please
  follow the documented rules in the
  Documentation/process/stable-kernel-rules.rst file for how to resolve
  this.

If you wish to discuss this problem further, or you have questions about
how to resolve this issue, please feel free to respond to this email and
Greg will reply once he has dug out from the pending patches received
from other developers.

thanks,

greg k-h's patch email bot
Mark Brown Aug. 14, 2024, 7:04 p.m. UTC | #2
On Wed, Aug 14, 2024 at 01:20:21AM +0300, Cristian Ciocaltea wrote:

> Commit 3d59c22bbb8d ("drm/rockchip: vop2: Convert to use maple tree
> register cache") enabled the use of maple tree register cache in
> Rockchip VOP2 driver.  However, building the kernel with lockdep support
> indicates locking rules violation when trying to unload the rockchipdrm
> module:

> [ 48.360258] ========================================================
> [ 48.360829] WARNING: possible irq lock inversion dependency detected
> [ 48.361400] 6.11.0-rc1 #40 Not tainted
> [ 48.361743] --------------------------------------------------------
> [ 48.362311] modprobe/685 just changed the state of lock:
> [ 48.362790] ffff0000087fa798 (&mt->ma_lock){+...}-{2:2}, at: regcache_maple_exit+0x6c/0xe0

Please think hard before including complete backtraces in upstream
reports, they are very large and contain almost no useful information
relative to their size so often obscure the relevant content in your
message. If part of the backtrace is usefully illustrative (it often is
for search engines if nothing else) then it's usually better to pull out
the relevant sections.

> The problem is that the regmap lock could be taken by an IRQ context,
> interrupting the irq-unsafe maple tree lock, which may result in a lock
> inversion deadlock scenario.

> Switch to use irq-safe locking in the maple tree register cache.

I'd have a bigger question here which is why the driver is using a
dynamically allocated register cache in a hardirq context, especially
with no defaults provided?  Anything except the flat cache might do
allocations at runtime which might include in interrupt context unless
the caller is very careful and since the lockdep warning triggered it's
clear that this driver isn't.  The core will be doing atomic allocations
for MMIO but that's not something we want to be doing as a matter of
course...  I would generally expect drivers to try to ensure that any
registers are cached outside of the interrupt handler, usually by
specifying defaults or touching all registers during setup.

Without having done a full analysis it also looks like the marking of
volatile registers isn't right, it's not immediately clear that the
interrupt status and clear registers are volatile and they ought to be.
None of the registers accessed in interrupt context look like they
should be cached at all unless there's something triggered via the DRM
vblank calls.

It might be safer to fall back to the rbtree cache for this device since
rbtree doesn't force an extra level of locking on us, though like I say
I'm not convinced that what the driver is doing with caching is a super
good idea.  Though probably what the driver is doing should work.

> Fixes: f033c26de5a5 ("regmap: Add maple tree based register cache")

This is obvious nonsense.  If anything it'd be the conversion to maple
tree, though there were issues before then as it was a conversion from
rbtree that was what added the extra locking.  Please only add Fixes
tags if there's a clear link between the issue and the commit being
pointed at.

> +#define mas_lock_irq(mas, flags) spin_lock_irqsave(&((mas)->tree->ma_lock), flags)
> +#define mas_unlock_irq(mas, flags) spin_unlock_irqrestore(&((mas)->tree->ma_lock), flags)

It's clearly not appropriate to add these outside of the maple tree
code, especially with this naming - this should be with the other
mas_lock() stuff in the maple tree header so added as a separate commit.
It also doesn't seem like a super good idea to unconditionally force all
maple tree users to save interrupt state whenever they need to do
allocations, the spinlock is a bit heavyweight already and this just
escalates it.

My first thought here is that if we've got a regmap using spinlocks for
the regmap lock and a maple tree cache we should arrange things so that
the maple tree lock is used for the regmap's lock.  That would however
involve some unpleasant abstraction violation, and possibly some macro
fun since we'd need to elide the locking from the cache itself when
using the same lock at the regmap level.  I think that's going to be a
case of choosing the least unpleasant option.
Mark Brown Aug. 14, 2024, 7:25 p.m. UTC | #3
On Wed, Aug 14, 2024 at 08:04:07PM +0100, Mark Brown wrote:

> My first thought here is that if we've got a regmap using spinlocks for
> the regmap lock and a maple tree cache we should arrange things so that
> the maple tree lock is used for the regmap's lock.  That would however
> involve some unpleasant abstraction violation, and possibly some macro
> fun since we'd need to elide the locking from the cache itself when
> using the same lock at the regmap level.  I think that's going to be a
> case of choosing the least unpleasant option.

Actually I think that modulo issues with devices that disable regmap
level locking entirely or use hwspinlocks we can persuade the cache to
always use the regmap level lock, even for mutexes, which might clean up
the code a bit and would avoid the double locking for the common case.
Cristian Ciocaltea Aug. 16, 2024, 8:11 p.m. UTC | #4
On 8/14/24 10:04 PM, Mark Brown wrote:
> On Wed, Aug 14, 2024 at 01:20:21AM +0300, Cristian Ciocaltea wrote:

[...]

> I'd have a bigger question here which is why the driver is using a
> dynamically allocated register cache in a hardirq context, especially
> with no defaults provided?  Anything except the flat cache might do
> allocations at runtime which might include in interrupt context unless
> the caller is very careful and since the lockdep warning triggered it's
> clear that this driver isn't.  The core will be doing atomic allocations
> for MMIO but that's not something we want to be doing as a matter of
> course...  I would generally expect drivers to try to ensure that any
> registers are cached outside of the interrupt handler, usually by
> specifying defaults or touching all registers during setup.
> 
> Without having done a full analysis it also looks like the marking of
> volatile registers isn't right, it's not immediately clear that the
> interrupt status and clear registers are volatile and they ought to be.
> None of the registers accessed in interrupt context look like they
> should be cached at all unless there's something triggered via the DRM
> vblank calls.

AFAIKT, all registers accessed in IRQ context are volatile, hence the
register cache should not be involved at that point.

The deadlock scenario indicated by lockdep actually points to the lock
acquired by regcache_maple_exit(), which has been triggered during module
unload operation, and the lock acquired by regcache_maple_write(), in the
context of vop2_plane_atomic_update() called within the DRM stack.

[   48.466666] -> (&mt->ma_lock){+...}-{2:2} {
[   48.467066]    HARDIRQ-ON-W at:
[   48.467360]                     lock_acquire+0x1d4/0x320
[   48.467849]                     _raw_spin_lock+0x50/0x70
[   48.468337]                     regcache_maple_exit+0x6c/0xe0
[   48.468864]                     regcache_exit+0x8c/0xa8
[   48.469344]                     regmap_exit+0x24/0x160
[   48.469815]                     devm_regmap_release+0x1c/0x28
[   48.470339]                     release_nodes+0x68/0xa8
[   48.470818]                     devres_release_group+0x120/0x180
[   48.471364]                     component_unbind+0x54/0x70
[   48.471867]                     component_unbind_all+0xb0/0xe8
[   48.472400]                     rockchip_drm_unbind+0x44/0x80 [rockchipdrm]
[   48.473059]                     component_del+0xc8/0x158
[   48.473545]                     dw_hdmi_rockchip_remove+0x28/0x40 [rockchipdrm]

[...]

[   48.482058]    INITIAL USE at:
[   48.482344]                    lock_acquire+0x1d4/0x320
[   48.482824]                    _raw_spin_lock+0x50/0x70
[   48.483304]                    regcache_maple_write+0x27c/0x330
[   48.483844]                    regcache_write+0x6c/0x88
[   48.484323]                    _regmap_read+0x198/0x1c8
[   48.484801]                    _regmap_update_bits+0xc0/0x148
[   48.485327]                    regmap_field_update_bits_base+0x74/0xb0
[   48.485919]                    vop2_plane_atomic_update+0x9e8/0x1490 [rockchipdrm]
[   48.486631]                    drm_atomic_helper_commit_planes+0x190/0x2f8 [drm_kms_helper]

I experimented with a reduced scope of this patch by limiting the use of
the irq-safe lock to regcache_maple_exit() only, and I can confirm this 
was enough to make lockdep happy.

> It might be safer to fall back to the rbtree cache for this device since
> rbtree doesn't force an extra level of locking on us, though like I say
> I'm not convinced that what the driver is doing with caching is a super
> good idea.  Though probably what the driver is doing should work.

I actually gave the flat cache a try on a Rock 3A board and didn't
encounter any (obvious) issues, but my testing capabilities are rather
limited at the moment.

@Andy: Could you, please, shed some light on the topic? i.e. the rational
behind going for an rbtree cache over a flat one, since the latter would be
better suited for MMIO devices.
 
> My first thought here is that if we've got a regmap using spinlocks for
> the regmap lock and a maple tree cache we should arrange things so that
> the maple tree lock is used for the regmap's lock.  That would however
> involve some unpleasant abstraction violation, and possibly some macro
> fun since we'd need to elide the locking from the cache itself when
> using the same lock at the regmap level.  I think that's going to be a
> case of choosing the least unpleasant option.

Thanks, Mark, for the detailed feedback on this!

Regards,
Cristian
Mark Brown Aug. 16, 2024, 11:35 p.m. UTC | #5
On Fri, Aug 16, 2024 at 11:11:27PM +0300, Cristian Ciocaltea wrote:

> The deadlock scenario indicated by lockdep actually points to the lock
> acquired by regcache_maple_exit(), which has been triggered during module
> unload operation, and the lock acquired by regcache_maple_write(), in the
> context of vop2_plane_atomic_update() called within the DRM stack.

So still within the interrupt context then, and due to the fact that the
register has not been accessed outside of interrupt context.  Reading or
writing any cached registers used in atomic context on init should do
that and avoid the issue I expect.  In general if you're going to use a
sparse cache you should ensure it's populated during init, like I said
in the prior mail providing defaults is the standard way to do that but
just an access should also work.
Andy Yan Aug. 19, 2024, 10:18 a.m. UTC | #6
Hi Cristian,

At 2024-08-17 04:11:27, "Cristian Ciocaltea" <cristian.ciocaltea@collabora.com> wrote:
>On 8/14/24 10:04 PM, Mark Brown wrote:
>> On Wed, Aug 14, 2024 at 01:20:21AM +0300, Cristian Ciocaltea wrote:
>
>[...]
>
>> I'd have a bigger question here which is why the driver is using a
>> dynamically allocated register cache in a hardirq context, especially
>> with no defaults provided?  Anything except the flat cache might do
>> allocations at runtime which might include in interrupt context unless
>> the caller is very careful and since the lockdep warning triggered it's
>> clear that this driver isn't.  The core will be doing atomic allocations
>> for MMIO but that's not something we want to be doing as a matter of
>> course...  I would generally expect drivers to try to ensure that any
>> registers are cached outside of the interrupt handler, usually by
>> specifying defaults or touching all registers during setup.
>> 
>> Without having done a full analysis it also looks like the marking of
>> volatile registers isn't right, it's not immediately clear that the
>> interrupt status and clear registers are volatile and they ought to be.
>> None of the registers accessed in interrupt context look like they
>> should be cached at all unless there's something triggered via the DRM
>> vblank calls.
>
>AFAIKT, all registers accessed in IRQ context are volatile, hence the
>register cache should not be involved at that point.
>
>The deadlock scenario indicated by lockdep actually points to the lock
>acquired by regcache_maple_exit(), which has been triggered during module
>unload operation, and the lock acquired by regcache_maple_write(), in the
>context of vop2_plane_atomic_update() called within the DRM stack.
>
>[   48.466666] -> (&mt->ma_lock){+...}-{2:2} {
>[   48.467066]    HARDIRQ-ON-W at:
>[   48.467360]                     lock_acquire+0x1d4/0x320
>[   48.467849]                     _raw_spin_lock+0x50/0x70
>[   48.468337]                     regcache_maple_exit+0x6c/0xe0
>[   48.468864]                     regcache_exit+0x8c/0xa8
>[   48.469344]                     regmap_exit+0x24/0x160
>[   48.469815]                     devm_regmap_release+0x1c/0x28
>[   48.470339]                     release_nodes+0x68/0xa8
>[   48.470818]                     devres_release_group+0x120/0x180
>[   48.471364]                     component_unbind+0x54/0x70
>[   48.471867]                     component_unbind_all+0xb0/0xe8
>[   48.472400]                     rockchip_drm_unbind+0x44/0x80 [rockchipdrm]
>[   48.473059]                     component_del+0xc8/0x158
>[   48.473545]                     dw_hdmi_rockchip_remove+0x28/0x40 [rockchipdrm]
>
>[...]
>
>[   48.482058]    INITIAL USE at:
>[   48.482344]                    lock_acquire+0x1d4/0x320
>[   48.482824]                    _raw_spin_lock+0x50/0x70
>[   48.483304]                    regcache_maple_write+0x27c/0x330
>[   48.483844]                    regcache_write+0x6c/0x88
>[   48.484323]                    _regmap_read+0x198/0x1c8
>[   48.484801]                    _regmap_update_bits+0xc0/0x148
>[   48.485327]                    regmap_field_update_bits_base+0x74/0xb0
>[   48.485919]                    vop2_plane_atomic_update+0x9e8/0x1490 [rockchipdrm]
>[   48.486631]                    drm_atomic_helper_commit_planes+0x190/0x2f8 [drm_kms_helper]
>
>I experimented with a reduced scope of this patch by limiting the use of
>the irq-safe lock to regcache_maple_exit() only, and I can confirm this 
>was enough to make lockdep happy.
>
>> It might be safer to fall back to the rbtree cache for this device since
>> rbtree doesn't force an extra level of locking on us, though like I say
>> I'm not convinced that what the driver is doing with caching is a super
>> good idea.  Though probably what the driver is doing should work.
>
>I actually gave the flat cache a try on a Rock 3A board and didn't
>encounter any (obvious) issues, but my testing capabilities are rather
>limited at the moment.
>
>@Andy: Could you, please, shed some light on the topic? i.e. the rational
>behind going for an rbtree cache over a flat one, since the latter would be
>better suited for MMIO devices.

I have encountered a similar issue when I add support for rk3588[0]

Now i can see this issue when rockchipdrm load with:
CONFIG_PROVE_LOCKING=y
CONFIG_DEBUG_LOCKDEP=y

But I can't reproduce this issue  at unload (with cmd: rmmod rockchipdrm)。
I need to take a deeper look to understanding the detail。


[0]https://patchwork.kernel.org/project/linux-rockchip/patch/20231217084415.2373043-1-andyshrk@163.com/



> 
>> My first thought here is that if we've got a regmap using spinlocks for
>> the regmap lock and a maple tree cache we should arrange things so that
>> the maple tree lock is used for the regmap's lock.  That would however
>> involve some unpleasant abstraction violation, and possibly some macro
>> fun since we'd need to elide the locking from the cache itself when
>> using the same lock at the regmap level.  I think that's going to be a
>> case of choosing the least unpleasant option.
>
>Thanks, Mark, for the detailed feedback on this!
>
>Regards,
>Cristian
>
>_______________________________________________
>Linux-rockchip mailing list
>Linux-rockchip@lists.infradead.org
>http://lists.infradead.org/mailman/listinfo/linux-rockchip
Andy Yan Aug. 20, 2024, 6:19 a.m. UTC | #7
Hi Mark and Cristian,
also cc Sasha,

On 8/15/24 03:04, Mark Brown wrote:
> On Wed, Aug 14, 2024 at 01:20:21AM +0300, Cristian Ciocaltea wrote:
> 
>> Commit 3d59c22bbb8d ("drm/rockchip: vop2: Convert to use maple tree
>> register cache") enabled the use of maple tree register cache in
>> Rockchip VOP2 driver.  However, building the kernel with lockdep support
>> indicates locking rules violation when trying to unload the rockchipdrm
>> module:
> 
>> [ 48.360258] ========================================================
>> [ 48.360829] WARNING: possible irq lock inversion dependency detected
>> [ 48.361400] 6.11.0-rc1 #40 Not tainted
>> [ 48.361743] --------------------------------------------------------
>> [ 48.362311] modprobe/685 just changed the state of lock:
>> [ 48.362790] ffff0000087fa798 (&mt->ma_lock){+...}-{2:2}, at: regcache_maple_exit+0x6c/0xe0
> 
> Please think hard before including complete backtraces in upstream
> reports, they are very large and contain almost no useful information
> relative to their size so often obscure the relevant content in your
> message. If part of the backtrace is usefully illustrative (it often is
> for search engines if nothing else) then it's usually better to pull out
> the relevant sections.
> 
>> The problem is that the regmap lock could be taken by an IRQ context,
>> interrupting the irq-unsafe maple tree lock, which may result in a lock
>> inversion deadlock scenario.
> 
>> Switch to use irq-safe locking in the maple tree register cache.
> 
> I'd have a bigger question here which is why the driver is using a
> dynamically allocated register cache in a hardirq context, especially
> with no defaults provided?  Anything except the flat cache might do

Do you mean the current code we call devm_regmap_init_mmio in vop2_bind function ?

> allocations at runtime which might include in interrupt context unless
> the caller is very careful and since the lockdep warning triggered it's
> clear that this driver isn't.  The core will be doing atomic allocations
> for MMIO but that's not something we want to be doing as a matter of
> course...  I would generally expect drivers to try to ensure that any
> registers are cached outside of the interrupt handler, usually by
> specifying defaults or touching all registers during setup.
> 
> Without having done a full analysis it also looks like the marking of
> volatile registers isn't right, it's not immediately clear that the
> interrupt status and clear registers are volatile and they ought to be.
> None of the registers accessed in interrupt context look like they
> should be cached at all unless there's something triggered via the DRM
> vblank calls.
> 

I think the interrupt status and clear registers should also be marked as volatile.
But this is not releated to the current issue, right?
> It might be safer to fall back to the rbtree cache for this device since
> rbtree doesn't force an extra level of locking on us, though like I say

I also think fall back to rbtree would work. I had a similar thought the first
time I encountered this issue[0]. But i try to  keep maple tree as is based
on a much more modern data structure.

> I'm not convinced that what the driver is doing with caching is a super
> good idea.  Though probably what the driver is doing should work.
The registers of VOP are quite special: Each write operation to the register
does not take effect immediately, it only take effect after the next VBLANK if
we write the CFGONE register.
So we need a cache to record what we wrote to register before.

> 


[0]https://patchwork.kernel.org/project/linux-rockchip/patch/20231217084415.2373043-1-andyshrk@163.com/
diff mbox series

Patch

diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c
index 2dea9d259c49..7b2433c9747e 100644
--- a/drivers/base/regmap/regcache-maple.c
+++ b/drivers/base/regmap/regcache-maple.c
@@ -13,6 +13,9 @@ 
 
 #include "internal.h"
 
+#define mas_lock_irq(mas, flags)           spin_lock_irqsave(&((mas)->tree->ma_lock), flags)
+#define mas_unlock_irq(mas, flags)         spin_unlock_irqrestore(&((mas)->tree->ma_lock), flags)
+
 static int regcache_maple_read(struct regmap *map,
 			       unsigned int reg, unsigned int *value)
 {
@@ -42,6 +45,7 @@  static int regcache_maple_write(struct regmap *map, unsigned int reg,
 	MA_STATE(mas, mt, reg, reg);
 	unsigned long *entry, *upper, *lower;
 	unsigned long index, last;
+	unsigned long flags;
 	size_t lower_sz, upper_sz;
 	int ret;
 
@@ -89,18 +93,18 @@  static int regcache_maple_write(struct regmap *map, unsigned int reg,
 	 * is redundant, but we need to take it due to lockdep asserts
 	 * in the maple tree code.
 	 */
-	mas_lock(&mas);
+	mas_lock_irq(&mas, flags);
 
 	mas_set_range(&mas, index, last);
 	ret = mas_store_gfp(&mas, entry, map->alloc_flags);
 
-	mas_unlock(&mas);
+	mas_unlock_irq(&mas, flags);
 
 	if (ret == 0) {
 		kfree(lower);
 		kfree(upper);
 	}
-	
+
 	return ret;
 }
 
@@ -113,12 +117,13 @@  static int regcache_maple_drop(struct regmap *map, unsigned int min,
 	/* initialized to work around false-positive -Wuninitialized warning */
 	unsigned long lower_index = 0, lower_last = 0;
 	unsigned long upper_index, upper_last;
+	unsigned long flags;
 	int ret = 0;
 
 	lower = NULL;
 	upper = NULL;
 
-	mas_lock(&mas);
+	mas_lock_irq(&mas, flags);
 
 	mas_for_each(&mas, entry, max) {
 		/*
@@ -126,7 +131,7 @@  static int regcache_maple_drop(struct regmap *map, unsigned int min,
 		 * Maple lock is redundant, but we need to take it due
 		 * to lockdep asserts in the maple tree code.
 		 */
-		mas_unlock(&mas);
+		mas_unlock_irq(&mas, flags);
 
 		/* Do we need to save any of this entry? */
 		if (mas.index < min) {
@@ -156,7 +161,7 @@  static int regcache_maple_drop(struct regmap *map, unsigned int min,
 		}
 
 		kfree(entry);
-		mas_lock(&mas);
+		mas_lock_irq(&mas, flags);
 		mas_erase(&mas);
 
 		/* Insert new nodes with the saved data */
@@ -178,7 +183,7 @@  static int regcache_maple_drop(struct regmap *map, unsigned int min,
 	}
 
 out:
-	mas_unlock(&mas);
+	mas_unlock_irq(&mas, flags);
 out_unlocked:
 	kfree(lower);
 	kfree(upper);
@@ -295,16 +300,17 @@  static int regcache_maple_exit(struct regmap *map)
 	struct maple_tree *mt = map->cache;
 	MA_STATE(mas, mt, 0, UINT_MAX);
 	unsigned int *entry;
+	unsigned long flags;
 
 	/* if we've already been called then just return */
 	if (!mt)
 		return 0;
 
-	mas_lock(&mas);
+	mas_lock_irq(&mas, flags);
 	mas_for_each(&mas, entry, UINT_MAX)
 		kfree(entry);
 	__mt_destroy(mt);
-	mas_unlock(&mas);
+	mas_unlock_irq(&mas, flags);
 
 	kfree(mt);
 	map->cache = NULL;
@@ -318,6 +324,7 @@  static int regcache_maple_insert_block(struct regmap *map, int first,
 	struct maple_tree *mt = map->cache;
 	MA_STATE(mas, mt, first, last);
 	unsigned long *entry;
+	unsigned long flags;
 	int i, ret;
 
 	entry = kcalloc(last - first + 1, sizeof(unsigned long), map->alloc_flags);
@@ -327,13 +334,13 @@  static int regcache_maple_insert_block(struct regmap *map, int first,
 	for (i = 0; i < last - first + 1; i++)
 		entry[i] = map->reg_defaults[first + i].def;
 
-	mas_lock(&mas);
+	mas_lock_irq(&mas, flags);
 
 	mas_set_range(&mas, map->reg_defaults[first].reg,
 		      map->reg_defaults[last].reg);
 	ret = mas_store_gfp(&mas, entry, map->alloc_flags);
 
-	mas_unlock(&mas);
+	mas_unlock_irq(&mas, flags);
 
 	if (ret)
 		kfree(entry);