Message ID | 20241017094315.6948-1-fw@strlen.de (mailing list archive) |
---|---|
State | Awaiting Upstream |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [ipsec] xfrm: migrate: work around 0 if_id on migrate | expand |
On Thu, Oct 17, 2024 at 12:33 PM Florian Westphal <fw@strlen.de> wrote: > > Looks like there are userspace applications which rely on the ability to > migrate xfrm-interface policies while not providing the interface id. > > This worked because old code contains a match workaround: > "if_id == 0 || pol->if_id == if_id". > > The packetpath lookup code uses the id_id as a key into rhashtable; after > switch of migrate lookup over to those functions policy lookup fails. > > Add a workaround: if no policy to migrate is found and userspace provided > no if_id (normal for non-interface policies!) do a full search of all > policies and try to find one that matches the selector. > > This is super-slow, so print a warning when we find a policy, so > hopefully such userspace requests are fixed up over time to not rely on > magic-0-match. > > In case of setups where userspace frequently tries to migrate non-existent > policies with if_id 0 such migrate requests will take much longer to > complete. > > Other options: > 1. also add xfrm interface usage counter so we know in advance if the > slowpath could potentially find the 'right' policy or not. > > 2. Completely revert policy insertion patches and go back to linear > insertion complexity. > > 1) seems premature, I'd expect userspace to try migration of policies it > has inserted in the past, so either normal fastpath or slowpath > should find a match anyway. > > 2) seems like a worse option to me. > > Cc: Nathan Harold <nharold@google.com> > Cc: Maciej Żenczykowski <maze@google.com> > Cc: Steffen Klassert <steffen.klassert@secunet.com> > Fixes: 563d5ca93e88 ("xfrm: switch migrate to xfrm_policy_lookup_bytype") > Reported-by: Yan Yan <evitayan@google.com> > Tested-by: Yan Yan <evitayan@google.com> > Signed-off-by: Florian Westphal <fw@strlen.de> > --- > net/xfrm/xfrm_policy.c | 101 +++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 101 insertions(+) > > diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c > index d555ea401234..29554173831a 100644 > --- a/net/xfrm/xfrm_policy.c > +++ b/net/xfrm/xfrm_policy.c > @@ -4425,6 +4425,81 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); > #endif > > #ifdef CONFIG_XFRM_MIGRATE > +static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, > + const struct xfrm_selector *sel_tgt) > +{ > + if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { > + if (sel_tgt->family == sel_cmp->family && > + xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, > + sel_cmp->family) && > + xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, > + sel_cmp->family) && > + sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && > + sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { > + return true; > + } > + } else { > + if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) > + return true; > + } > + return false; > +} > + > +/* Ugly workaround for userspace that wants to migrate policies for > + * xfrm interfaces but does not provide the interface if_id. > + * > + * Old code used to search the lists and handled if_id == 0 as 'does match'. > + * New xfrm_migrate code uses the packet-path lookup which uses the if_id > + * as part of hash key and won't find correct policies. > + * > + * Walk entire policy list to see if there is a matching selector without > + * checking if_id. > + */ > +static u32 xfrm_migrate_policy_find_slow(const struct xfrm_selector *sel, > + u8 dir, u8 type, struct net *net) > +{ > + const struct xfrm_policy *policy, *cand = NULL; > + const struct hlist_head *chain; > + u32 if_id = 0; > + > + chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); > + hlist_for_each_entry(policy, chain, bydst) { > + if (policy->type != type) > + continue; > + > + if (xfrm_migrate_selector_match(sel, &policy->selector)) { > + if_id = policy->if_id; > + cand = policy; > + break; > + } > + } > + > + spin_lock_bh(&net->xfrm.xfrm_policy_lock); > + > + list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) { > + if (xfrm_policy_is_dead_or_sk(policy)) > + continue; > + > + if (policy->type != type) > + continue; > + > + /* candidate has better priority */ > + if (cand && policy->priority >= cand->priority) > + continue; > + > + if (dir != xfrm_policy_id2dir(policy->index)) > + continue; > + > + if (xfrm_migrate_selector_match(sel, &policy->selector)) { > + if_id = policy->if_id; > + cand = policy; > + } > + } > + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); > + > + return if_id; > +} > + > static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, > u8 dir, u8 type, struct net *net, u32 if_id) > { > @@ -4579,6 +4654,19 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate, > return 0; > } > > +static void xfrm_migrate_warn_workaround(void) > +{ > + char name[sizeof(current->comm)]; > + static bool warned; > + > + if (warned) > + return; > + > + warned = true; > + pr_warn_once("warning: `%s' is migrating xfrm interface policies with if_id 0, this is slow.\n", > + get_task_comm(name, current)); > +} > + > int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, > struct xfrm_migrate *m, int num_migrate, > struct xfrm_kmaddress *k, struct net *net, > @@ -4606,11 +4694,24 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, > /* Stage 1 - find policy */ > pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id); > if (IS_ERR_OR_NULL(pol)) { > + if (if_id == 0) { > + if_id = xfrm_migrate_policy_find_slow(sel, dir, type, net); > + > + if (if_id) { > + pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id); > + if (!IS_ERR_OR_NULL(pol)) { > + xfrm_migrate_warn_workaround(); > + goto found; > + } > + } > + } > + > NL_SET_ERR_MSG(extack, "Target policy not found"); > err = IS_ERR(pol) ? PTR_ERR(pol) : -ENOENT; > goto out; > } > > +found: > /* Stage 2 - find and update state(s) */ > for (i = 0, mp = m; i < num_migrate; i++, mp++) { > if ((x = xfrm_migrate_state_find(mp, net, if_id))) { > -- > 2.45.2 > Q: Considering the performance impact... would it make sense to hide this behind a sysctl or a kconfig? Yan Yan: Also, while I know we found this issue in Android... do we actually need the fix? Wasn't the adjustment to netd sufficient? Android <16 doesn't support >6.6 anyway, and Android 16 should already have the netd fix...
Maciej Żenczykowski <maze@google.com> wrote: > > +found: > > /* Stage 2 - find and update state(s) */ > > for (i = 0, mp = m; i < num_migrate; i++, mp++) { > > if ((x = xfrm_migrate_state_find(mp, net, if_id))) { > > -- > > 2.45.2 > > > > Q: Considering the performance impact... would it make sense to hide > this behind a sysctl or a kconfig? Kconfig? I don't think so, all distros except Android would turn it on. > Yan Yan: Also, while I know we found this issue in Android... do we > actually need the fix? Wasn't the adjustment to netd sufficient? > Android <16 doesn't support >6.6 anyway, and Android 16 should already > have the netd fix... ... seems you already fixed this, so I suspect this slowpath won't ever run in your case. Following relevant cases exist: 1. Userspace asks to migrate existing policy, provides if_id > 0. -> slowpath is elided. 2. Userspace asks to migrate existing policy, the policy is NOT for xfrm_interface, -> slowpath is also elided because first attempt finds the if_id 0 policy. 3. Like 1, but userspace does not set the if_id. -> slowpath runs, BUT without it migration would not work. 4. Like 2, but the policy doesn't exist. -> slowpath runs and slows things down for no reason. For 1 and 2 even sysctl knob is irrelevant. For 3, sysctl knob is *technically* irrelevant, either migrate is broken (sysctl off) or its on and policy migrate will work. This also hints we'd have to turn such sysctl on by default... For 4, sysctl could be used to disable/avoid such slowdown. But I'm not sure this is a relevant scenario in practice, aside from fuzzers, AND it breaks 3) again if its off. So I don't see a need to provide a config knob or a sysctl that would have to be on by default... If you think a Kconfig knob makes sense for Android sake I can respin with such a knob, but I think I'd have to make it default-y.
On Thu, Oct 17, 2024 at 12:52 PM Florian Westphal <fw@strlen.de> wrote: > > Maciej Żenczykowski <maze@google.com> wrote: > > > +found: > > > /* Stage 2 - find and update state(s) */ > > > for (i = 0, mp = m; i < num_migrate; i++, mp++) { > > > if ((x = xfrm_migrate_state_find(mp, net, if_id))) { > > > -- > > > 2.45.2 > > > > > > > Q: Considering the performance impact... would it make sense to hide > > this behind a sysctl or a kconfig? > > Kconfig? I don't think so, all distros except Android would turn it on. > > > Yan Yan: Also, while I know we found this issue in Android... do we > > actually need the fix? Wasn't the adjustment to netd sufficient? > > Android <16 doesn't support >6.6 anyway, and Android 16 should already > > have the netd fix... > > ... seems you already fixed this, so I suspect this slowpath won't ever > run in your case. > > Following relevant cases exist: > 1. Userspace asks to migrate existing policy, provides if_id > 0. > -> slowpath is elided. > > 2. Userspace asks to migrate existing policy, the policy is NOT for > xfrm_interface, -> slowpath is also elided because first attempt > finds the if_id 0 policy. > > 3. Like 1, but userspace does not set the if_id. > -> slowpath runs, BUT without it migration would not work. > > 4. Like 2, but the policy doesn't exist. > -> slowpath runs and slows things down for no reason. > > For 1 and 2 even sysctl knob is irrelevant. > > For 3, sysctl knob is *technically* irrelevant, either migrate is > broken (sysctl off) or its on and policy migrate will work. > This also hints we'd have to turn such sysctl on by default... > > For 4, sysctl could be used to disable/avoid such slowdown. > But I'm not sure this is a relevant scenario in practice, aside > from fuzzers, AND it breaks 3) again if its off. > > So I don't see a need to provide a config knob or a sysctl > that would have to be on by default... > > If you think a Kconfig knob makes sense for Android sake I can respin > with such a knob, but I think I'd have to make it default-y. I'll trust your judgment. I thought we could maybe eventually deprecate and delete this, but it sounds like that isn't going to be the case...
Maciej Żenczykowski <maze@google.com> wrote: > > If you think a Kconfig knob makes sense for Android sake I can respin > > with such a knob, but I think I'd have to make it default-y. > > I'll trust your judgment. I thought we could maybe eventually > deprecate and delete this, but it sounds like that isn't going to be > the case... We could also say 'android fixed it in userspace' and not fix it in kernel for now until someone else complains. Or modify the pr_warn to also say something like "deprecated and scheduled to be removed in 2026"? I'm not thrilled with this patch :-)
On Thu, Oct 17, 2024 at 1:33 PM Florian Westphal <fw@strlen.de> wrote: > > Maciej Żenczykowski <maze@google.com> wrote: > > > If you think a Kconfig knob makes sense for Android sake I can respin > > > with such a knob, but I think I'd have to make it default-y. > > > > I'll trust your judgment. I thought we could maybe eventually > > deprecate and delete this, but it sounds like that isn't going to be > > the case... > > We could also say 'android fixed it in userspace' and not fix it > in kernel for now until someone else complains. The Android patch is here: https://android-review.googlesource.com/c/platform/system/netd/+/3303667 But I'm not certain if this is enough (though I would assume so). It's so much simpler and more correct than the kernel fix... > Or modify the pr_warn to also say something like > "deprecated and scheduled to be removed in 2026"? > > I'm not thrilled with this patch :-) Let's see what Yan Yan thinks is the right approach. XFRM migration seems like one of those things with very very few users, so it might be easy to just fix them.
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d555ea401234..29554173831a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -4425,6 +4425,81 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); #endif #ifdef CONFIG_XFRM_MIGRATE +static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, + const struct xfrm_selector *sel_tgt) +{ + if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { + if (sel_tgt->family == sel_cmp->family && + xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, + sel_cmp->family) && + xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, + sel_cmp->family) && + sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && + sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { + return true; + } + } else { + if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) + return true; + } + return false; +} + +/* Ugly workaround for userspace that wants to migrate policies for + * xfrm interfaces but does not provide the interface if_id. + * + * Old code used to search the lists and handled if_id == 0 as 'does match'. + * New xfrm_migrate code uses the packet-path lookup which uses the if_id + * as part of hash key and won't find correct policies. + * + * Walk entire policy list to see if there is a matching selector without + * checking if_id. + */ +static u32 xfrm_migrate_policy_find_slow(const struct xfrm_selector *sel, + u8 dir, u8 type, struct net *net) +{ + const struct xfrm_policy *policy, *cand = NULL; + const struct hlist_head *chain; + u32 if_id = 0; + + chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); + hlist_for_each_entry(policy, chain, bydst) { + if (policy->type != type) + continue; + + if (xfrm_migrate_selector_match(sel, &policy->selector)) { + if_id = policy->if_id; + cand = policy; + break; + } + } + + spin_lock_bh(&net->xfrm.xfrm_policy_lock); + + list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) { + if (xfrm_policy_is_dead_or_sk(policy)) + continue; + + if (policy->type != type) + continue; + + /* candidate has better priority */ + if (cand && policy->priority >= cand->priority) + continue; + + if (dir != xfrm_policy_id2dir(policy->index)) + continue; + + if (xfrm_migrate_selector_match(sel, &policy->selector)) { + if_id = policy->if_id; + cand = policy; + } + } + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); + + return if_id; +} + static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, u8 dir, u8 type, struct net *net, u32 if_id) { @@ -4579,6 +4654,19 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate, return 0; } +static void xfrm_migrate_warn_workaround(void) +{ + char name[sizeof(current->comm)]; + static bool warned; + + if (warned) + return; + + warned = true; + pr_warn_once("warning: `%s' is migrating xfrm interface policies with if_id 0, this is slow.\n", + get_task_comm(name, current)); +} + int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate, struct xfrm_kmaddress *k, struct net *net, @@ -4606,11 +4694,24 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, /* Stage 1 - find policy */ pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id); if (IS_ERR_OR_NULL(pol)) { + if (if_id == 0) { + if_id = xfrm_migrate_policy_find_slow(sel, dir, type, net); + + if (if_id) { + pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id); + if (!IS_ERR_OR_NULL(pol)) { + xfrm_migrate_warn_workaround(); + goto found; + } + } + } + NL_SET_ERR_MSG(extack, "Target policy not found"); err = IS_ERR(pol) ? PTR_ERR(pol) : -ENOENT; goto out; } +found: /* Stage 2 - find and update state(s) */ for (i = 0, mp = m; i < num_migrate; i++, mp++) { if ((x = xfrm_migrate_state_find(mp, net, if_id))) {