@@ -537,15 +537,22 @@ static void bond_ipsec_add_sa_all(struct bonding *bond)
}
list_for_each_entry(ipsec, &bond->ipsec_list, list) {
+ spin_lock_bh(&ipsec->xs->lock);
+ /* Skip dead xfrm states, they'll be freed later. */
+ if (ipsec->xs->km.state == XFRM_STATE_DEAD)
+ goto next;
+
/* If new state is added before ipsec_lock acquired */
if (ipsec->xs->xso.real_dev == real_dev)
- continue;
+ goto next;
ipsec->xs->xso.real_dev = real_dev;
if (real_dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs, NULL)) {
slave_warn(bond_dev, real_dev, "%s: failed to add SA\n", __func__);
ipsec->xs->xso.real_dev = NULL;
}
+next:
+ spin_unlock_bh(&ipsec->xs->lock);
}
out:
mutex_unlock(&bond->ipsec_lock);
@@ -560,7 +567,6 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs)
struct net_device *bond_dev = xs->xso.dev;
struct net_device *real_dev;
netdevice_tracker tracker;
- struct bond_ipsec *ipsec;
struct bonding *bond;
struct slave *slave;
@@ -592,15 +598,6 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs)
real_dev->xfrmdev_ops->xdo_dev_state_delete(xs);
out:
netdev_put(real_dev, &tracker);
- mutex_lock(&bond->ipsec_lock);
- list_for_each_entry(ipsec, &bond->ipsec_list, list) {
- if (ipsec->xs == xs) {
- list_del(&ipsec->list);
- kfree(ipsec);
- break;
- }
- }
- mutex_unlock(&bond->ipsec_lock);
}
static void bond_ipsec_del_sa_all(struct bonding *bond)
@@ -617,8 +614,18 @@ static void bond_ipsec_del_sa_all(struct bonding *bond)
mutex_lock(&bond->ipsec_lock);
list_for_each_entry(ipsec, &bond->ipsec_list, list) {
+ spin_lock_bh(&ipsec->xs->lock);
if (!ipsec->xs->xso.real_dev)
- continue;
+ goto next;
+
+ if (ipsec->xs->km.state == XFRM_STATE_DEAD) {
+ /* already dead no need to delete again */
+ if (real_dev->xfrmdev_ops->xdo_dev_state_free)
+ real_dev->xfrmdev_ops->xdo_dev_state_free(ipsec->xs);
+ list_del(&ipsec->list);
+ kfree(ipsec);
+ goto next;
+ }
if (!real_dev->xfrmdev_ops ||
!real_dev->xfrmdev_ops->xdo_dev_state_delete ||
@@ -631,6 +638,8 @@ static void bond_ipsec_del_sa_all(struct bonding *bond)
if (real_dev->xfrmdev_ops->xdo_dev_state_free)
real_dev->xfrmdev_ops->xdo_dev_state_free(ipsec->xs);
}
+next:
+ spin_unlock_bh(&ipsec->xs->lock);
}
mutex_unlock(&bond->ipsec_lock);
}
@@ -640,6 +649,7 @@ static void bond_ipsec_free_sa(struct xfrm_state *xs)
struct net_device *bond_dev = xs->xso.dev;
struct net_device *real_dev;
netdevice_tracker tracker;
+ struct bond_ipsec *ipsec;
struct bonding *bond;
struct slave *slave;
@@ -659,11 +669,22 @@ static void bond_ipsec_free_sa(struct xfrm_state *xs)
if (!xs->xso.real_dev)
goto out;
- WARN_ON(xs->xso.real_dev != real_dev);
+ mutex_lock(&bond->ipsec_lock);
+ list_for_each_entry(ipsec, &bond->ipsec_list, list) {
+ if (ipsec->xs == xs) {
+ /* do xdo_dev_state_free if real_dev matches,
+ * otherwise only remove the list
+ */
+ if (real_dev && real_dev->xfrmdev_ops &&
+ real_dev->xfrmdev_ops->xdo_dev_state_free)
+ real_dev->xfrmdev_ops->xdo_dev_state_free(xs);
+ list_del(&ipsec->list);
+ kfree(ipsec);
+ break;
+ }
+ }
+ mutex_unlock(&bond->ipsec_lock);
- if (real_dev && real_dev->xfrmdev_ops &&
- real_dev->xfrmdev_ops->xdo_dev_state_free)
- real_dev->xfrmdev_ops->xdo_dev_state_free(xs);
out:
netdev_put(real_dev, &tracker);
}
The fixed commit placed mutex_lock() inside spin_lock_bh(), which triggers a warning: BUG: sleeping function called from invalid context at... Fix this by moving the IPsec deletion operation to bond_ipsec_free_sa, which is not held by spin_lock_bh(). Additionally, delete the IPsec list in bond_ipsec_del_sa_all() when the XFRM state is DEAD to prevent xdo_dev_state_free() from being triggered again in bond_ipsec_free_sa(). For bond_ipsec_free_sa(), there are now three conditions: 1. if (!slave): When no active device exists. 2. if (!xs->xso.real_dev): When xdo_dev_state_add() fails. 3. if (xs->xso.real_dev != real_dev): When an xs has already been freed by bond_ipsec_del_sa_all() due to migration, and the active slave has changed to a new device. At the same time, the xs is marked as DEAD due to the XFRM entry is removed, triggering xfrm_state_gc_task() and bond_ipsec_free_sa(). In all three cases, xdo_dev_state_free() should not be called, only xs should be removed from bond->ipsec list. At the same time, protect bond_ipsec_del_sa_all and bond_ipsec_add_sa_all with x->lock for each xs being processed. This prevents XFRM from concurrently initiating add/delete operations on the managed states. Fixes: 2aeeef906d5a ("bonding: change ipsec_lock from spin lock to mutex") Reported-by: Jakub Kicinski <kuba@kernel.org> Closes: https://lore.kernel.org/netdev/20241212062734.182a0164@kernel.org Suggested-by: Cosmin Ratiu <cratiu@nvidia.com> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com> --- drivers/net/bonding/bond_main.c | 53 +++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 16 deletions(-)