Message ID | 20230918125914.21391-3-fw@strlen.de (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | xfrm: policy: replace session decode with flow dissector | expand |
Hello, kernel test robot noticed "WARNING:at_net/core/flow_dissector.c:#__skb_flow_dissect" on: commit: 7a6420ac36c0355d4d370cce83343dcefa58a1c4 ("[PATCH ipsec-next 2/2] xfrm: policy: replace session decode with flow dissector") url: https://github.com/intel-lab-lkp/linux/commits/Florian-Westphal/xfrm-move-mark-and-oif-flowi-decode-into-common-code/20230918-210254 base: https://git.kernel.org/cgit/linux/kernel/git/klassert/ipsec.git master patch link: https://lore.kernel.org/all/20230918125914.21391-3-fw@strlen.de/ patch subject: [PATCH ipsec-next 2/2] xfrm: policy: replace session decode with flow dissector in testcase: kernel-selftests version: kernel-selftests-x86_64-60acb023-1_20230329 with following parameters: group: netfilter test: nft_synproxy.sh compiler: gcc-12 test machine: 36 threads 1 sockets Intel(R) Core(TM) i9-10980XE CPU @ 3.00GHz (Cascade Lake) with 32G memory (please refer to attached dmesg/kmsg for entire log/backtrace) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <oliver.sang@intel.com> | Closes: https://lore.kernel.org/oe-lkp/202309271628.27fd2187-oliver.sang@intel.com kern :warn : [ 173.147140] ------------[ cut here ]------------ kern :warn : [ 173.147759] WARNING: CPU: 12 PID: 2260 at net/core/flow_dissector.c:1096 __skb_flow_dissect (net/core/flow_dissector.c:1096 (discriminator 1)) kern :warn : [ 173.148709] Modules linked in: nft_synproxy nf_synproxy_core nft_ct nf_tables veth nfnetlink openvswitch nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 intel_rapl_msr intel_rapl_common nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp btrfs coretemp kvm_intel blake2b_generic xor raid6_pq kvm zstd_compress irqbypass crct10dif_pclmul libcrc32c crc32_pclmul crc32c_intel ghash_clmulni_intel sha512_ssse3 rapl intel_cstate nvme nvme_core t10_pi crc64_rocksoft_generic crc64_rocksoft ipmi_devintf ahci libahci ipmi_msghandler intel_wmi_thunderbolt wmi_bmof mxm_wmi i2c_i801 wdat_wdt intel_uncore crc64 mei_me i2c_smbus libata ioatdma dca mei wmi binfmt_misc fuse drm ip_tables kern :warn : [ 173.154193] CPU: 12 PID: 2260 Comm: iperf3 Not tainted 6.5.0-04033-g7a6420ac36c0-dirty #1 kern :warn : [ 173.155024] Hardware name: Gigabyte Technology Co., Ltd. X299 UD4 Pro/X299 UD4 Pro-CF, BIOS F8a 04/27/2021 kern :warn : [ 173.155977] RIP: 0010:__skb_flow_dissect (net/core/flow_dissector.c:1096 (discriminator 1)) kern :warn : [ 173.156562] Code: b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 e4 39 00 00 4d 8b b6 a0 05 00 00 4d 85 f6 0f 85 9d f9 ff ff <0f> 0b e9 0f fb ff ff 66 81 fb 08 06 0f 84 ca fc ff ff 44 8b bd 00 All code ======== 0: b8 00 00 00 00 mov $0x0,%eax 5: 00 fc add %bh,%ah 7: ff (bad) 8: df 48 89 fisttps -0x77(%rax) b: fa cli c: 48 c1 ea 03 shr $0x3,%rdx 10: 80 3c 02 00 cmpb $0x0,(%rdx,%rax,1) 14: 0f 85 e4 39 00 00 jne 0x39fe 1a: 4d 8b b6 a0 05 00 00 mov 0x5a0(%r14),%r14 21: 4d 85 f6 test %r14,%r14 24: 0f 85 9d f9 ff ff jne 0xfffffffffffff9c7 2a:* 0f 0b ud2 <-- trapping instruction 2c: e9 0f fb ff ff jmpq 0xfffffffffffffb40 31: 66 81 fb 08 06 cmp $0x608,%bx 36: 0f 84 ca fc ff ff je 0xfffffffffffffd06 3c: 44 rex.R 3d: 8b .byte 0x8b 3e: bd .byte 0xbd ... Code starting with the faulting instruction =========================================== 0: 0f 0b ud2 2: e9 0f fb ff ff jmpq 0xfffffffffffffb16 7: 66 81 fb 08 06 cmp $0x608,%bx c: 0f 84 ca fc ff ff je 0xfffffffffffffcdc 12: 44 rex.R 13: 8b .byte 0x8b 14: bd .byte 0xbd ... kern :warn : [ 173.158315] RSP: 0018:ffffc900008e7e10 EFLAGS: 00010246 kern :warn : [ 173.158889] RAX: dffffc0000000000 RBX: 0000000000000008 RCX: ffffc900008e8240 kern :warn : [ 173.159631] RDX: 1ffff1103c1ac95b RSI: 0000000000000000 RDI: ffff8881e0d64ad8 kern :warn : [ 173.160365] RBP: ffffc900008e81e0 R08: 0000000000000000 R09: 0000000000000000 kern :warn : [ 173.161114] R10: ffffc900008e81f8 R11: ffffc900008e8240 R12: ffff8881e0d64ac0 kern :warn : [ 173.161871] R13: ffffffff8472b660 R14: 0000000000000000 R15: 0000000000000038 kern :warn : [ 173.162612] FS: 00007f4db0039740(0000) GS:ffff88880ea00000(0000) knlGS:0000000000000000 kern :warn : [ 173.163426] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kern :warn : [ 173.164051] CR2: 00007f4db0741f8c CR3: 000000087dbda001 CR4: 00000000003706e0 kern :warn : [ 173.164808] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 kern :warn : [ 173.165549] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 kern :warn : [ 173.166282] Call Trace: kern :warn : [ 173.166620] <IRQ> kern :warn : [ 173.166916] ? __warn (kernel/panic.c:673) kern :warn : [ 173.167316] ? __skb_flow_dissect (net/core/flow_dissector.c:1096 (discriminator 1)) kern :warn : [ 173.167837] ? report_bug (lib/bug.c:180 lib/bug.c:219) kern :warn : [ 173.168293] ? handle_bug (arch/x86/kernel/traps.c:324) kern :warn : [ 173.168724] ? exc_invalid_op (arch/x86/kernel/traps.c:345 (discriminator 1)) kern :warn : [ 173.169173] ? asm_exc_invalid_op (arch/x86/include/asm/idtentry.h:568) kern :warn : [ 173.169668] ? __skb_flow_dissect (net/core/flow_dissector.c:1096 (discriminator 1)) kern :warn : [ 173.170170] ? arch_stack_walk (arch/x86/kernel/stacktrace.c:27 (discriminator 1)) kern :warn : [ 173.170643] ? bpf_flow_dissect (net/core/flow_dissector.c:1029) kern :warn : [ 173.171125] ? native_queued_spin_lock_slowpath (arch/x86/include/asm/atomic.h:23 include/linux/atomic/atomic-arch-fallback.h:444 include/linux/atomic/atomic-instrumented.h:33 arch/x86/include/asm/qspinlock.h:25 kernel/locking/qspinlock.c:353) kern :warn : [ 173.171729] ? .slowpath (kernel/locking/qspinlock.c:317) kern :warn : [ 173.172143] ? lock_acquire (kernel/locking/lockdep.c:467 kernel/locking/lockdep.c:5763 kernel/locking/lockdep.c:5726) kern :warn : [ 173.172601] ? __create_object (mm/kmemleak.c:678) kern :warn : [ 173.173076] ? set_track_prepare (mm/slub.c:3016) kern :warn : [ 173.173555] ? mark_lock (arch/x86/include/asm/bitops.h:228 (discriminator 3) arch/x86/include/asm/bitops.h:240 (discriminator 3) include/asm-generic/bitops/instrumented-non-atomic.h:142 (discriminator 3) kernel/locking/lockdep.c:228 (discriminator 3) kernel/locking/lockdep.c:4663 (discriminator 3)) kern :warn : [ 173.173978] ? mark_lock (arch/x86/include/asm/bitops.h:228 (discriminator 3) arch/x86/include/asm/bitops.h:240 (discriminator 3) include/asm-generic/bitops/instrumented-non-atomic.h:142 (discriminator 3) kernel/locking/lockdep.c:228 (discriminator 3) kernel/locking/lockdep.c:4663 (discriminator 3)) kern :warn : [ 173.174398] ? reacquire_held_locks (kernel/locking/lockdep.c:5412) kern :warn : [ 173.174913] ? mark_lock_irq (kernel/locking/lockdep.c:4646) kern :warn : [ 173.175373] ? mark_held_locks (kernel/locking/lockdep.c:4281) kern :warn : [ 173.175841] ? mark_held_locks (kernel/locking/lockdep.c:4281) kern :warn : [ 173.176296] ? _raw_spin_unlock_irqrestore (arch/x86/include/asm/irqflags.h:42 arch/x86/include/asm/irqflags.h:77 arch/x86/include/asm/irqflags.h:135 include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194) kern :warn : [ 173.176849] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4574) kern :warn : [ 173.177468] ? rt_cache_route (net/ipv4/route.c:1485) kern :warn : [ 173.177925] ? find_held_lock (kernel/locking/lockdep.c:5251) kern :warn : [ 173.178384] ? __lock_release+0x111/0x440 kern :warn : [ 173.178905] ? ip_route_output_key_hash (include/linux/rcupdate.h:781 net/ipv4/route.c:2643) kern :warn : [ 173.179442] ? reacquire_held_locks (kernel/locking/lockdep.c:5412) kern :warn : [ 173.179960] ? __mkroute_output (include/net/lwtunnel.h:140 net/ipv4/route.c:2618) kern :warn : [ 173.181387] ? __xfrm_decode_session (net/xfrm/xfrm_policy.c:3451) kern :warn : [ 173.181904] __xfrm_decode_session (net/xfrm/xfrm_policy.c:3451) kern :warn : [ 173.182398] ? ip_route_output_key_hash_rcu (net/ipv4/route.c:2629) kern :warn : [ 173.182972] ? rt6_get_cookie (net/core/dst_cache.c:29) kern :warn : [ 173.183449] ip_route_me_harder (include/linux/skbuff.h:1121 net/ipv4/netfilter.c:66) kern :warn : [ 173.183941] ? nf_ip_route (net/ipv4/netfilter.c:21) kern :warn : [ 173.184369] ? mark_held_locks (kernel/locking/lockdep.c:4281) kern :warn : [ 173.184831] ? mark_held_locks (kernel/locking/lockdep.c:4281) kern :warn : [ 173.185293] ? _raw_spin_unlock_irqrestore (arch/x86/include/asm/irqflags.h:42 arch/x86/include/asm/irqflags.h:77 arch/x86/include/asm/irqflags.h:135 include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194) kern :warn : [ 173.185849] ? cookie_timestamp_decode (net/ipv4/syncookies.c:48) kern :warn : [ 173.186377] ? __alloc_skb (net/core/skbuff.c:666) kern :warn : [ 173.186829] ? lock_is_held_type (kernel/locking/lockdep.c:5502 kernel/locking/lockdep.c:5832) kern :warn : [ 173.187312] synproxy_send_tcp+0x2aa/0x540 nf_synproxy_core kern :warn : [ 173.187988] synproxy_send_client_synack (net/netfilter/nf_synproxy_core.c:484) nf_synproxy_core kern :warn : [ 173.188699] ? synproxy_send_client_synack_ipv6 (net/netfilter/nf_synproxy_core.c:450) nf_synproxy_core kern :warn : [ 173.189444] ? hrtimer_nanosleep (kernel/time/hrtimer.c:337 include/linux/hrtimer.h:255 kernel/time/hrtimer.c:2099) kern :warn : [ 173.189947] ? ktime_get (kernel/time/timekeeping.c:379 (discriminator 4) kernel/time/timekeeping.c:389 (discriminator 4) kernel/time/timekeeping.c:848 (discriminator 4)) kern :warn : [ 173.190388] nft_synproxy_do_eval (net/netfilter/nft_synproxy.c:60 net/netfilter/nft_synproxy.c:141) nft_synproxy kern :warn : [ 173.191004] ? nft_synproxy_obj_destroy (net/netfilter/nft_synproxy.c:109) nft_synproxy kern :warn : [ 173.191653] ? mark_lock (arch/x86/include/asm/bitops.h:228 (discriminator 3) arch/x86/include/asm/bitops.h:240 (discriminator 3) include/asm-generic/bitops/instrumented-non-atomic.h:142 (discriminator 3) kernel/locking/lockdep.c:228 (discriminator 3) kernel/locking/lockdep.c:4663 (discriminator 3)) kern :warn : [ 173.192073] ? lock_is_held_type (kernel/locking/lockdep.c:5502 kernel/locking/lockdep.c:5832) kern :warn : [ 173.192563] nft_do_chain (net/netfilter/nf_tables_core.c:290) nf_tables kern :warn : [ 173.193119] ? nft_update_chain_stats (net/netfilter/nf_tables_core.c:254) nf_tables kern :warn : [ 173.193784] ? __create_object (mm/kmemleak.c:678) kern :warn : [ 173.194274] nft_do_chain_inet (net/netfilter/nft_chain_filter.c:145) nf_tables kern :warn : [ 173.194856] ? nft_do_chain_arp (net/netfilter/nft_chain_filter.c:145) nf_tables kern :warn : [ 173.195420] ? NF_HOOK+0xca/0x2b0 kern :warn : [ 173.195924] ? lock_sync (kernel/locking/lockdep.c:5729) kern :warn : [ 173.196354] ? skb_release_data (arch/x86/include/asm/atomic.h:85 arch/x86/include/asm/atomic.h:91 include/linux/atomic/atomic-arch-fallback.h:778 include/linux/atomic/atomic-instrumented.h:290 net/core/skbuff.c:968) kern :warn : [ 173.196837] nf_hook_slow (include/linux/netfilter.h:144 net/netfilter/core.c:626) kern :warn : [ 173.197269] NF_HOOK+0x17f/0x2b0 kern :warn : [ 173.197763] ? ip_forward_finish (include/linux/netfilter.h:298) kern :warn : [ 173.198248] ? ip_route_input_slow (net/ipv4/route.c:2487) kern :warn : [ 173.198772] ? ip4_obj_hashfn (net/ipv4/ip_forward.c:66) kern :warn : [ 173.199237] ? tcp_v4_early_demux (net/ipv4/tcp_ipv4.c:1796) kern :warn : [ 173.199739] ? lock_is_held_type (kernel/locking/lockdep.c:5502 kernel/locking/lockdep.c:5832) kern :warn : [ 173.200221] ip_forward (net/ipv4/ip_forward.c:162) kern :warn : [ 173.200665] ? lock_is_held_type (kernel/locking/lockdep.c:5502 kernel/locking/lockdep.c:5832) kern :warn : [ 173.201143] ? __xfrm_policy_check2+0x460/0x460 kern :warn : [ 173.201747] ? ip_rcv_finish (include/linux/skbuff.h:1121 include/net/dst.h:468 net/ipv4/ip_input.c:449) kern :warn : [ 173.202205] ip_rcv (include/linux/netfilter.h:304 include/linux/netfilter.h:298 net/ipv4/ip_input.c:569) kern :warn : [ 173.202596] ? ip_local_deliver (net/ipv4/ip_input.c:562) kern :warn : [ 173.203078] ? ip_sublist_rcv (net/ipv4/ip_input.c:436) kern :warn : [ 173.203548] ? lock_acquire (kernel/locking/lockdep.c:467 kernel/locking/lockdep.c:5763 kernel/locking/lockdep.c:5726) kern :warn : [ 173.203997] ? process_backlog (include/linux/skbuff.h:2360 include/linux/skbuff.h:2375 net/core/dev.c:5963) kern :warn : [ 173.204489] ? ip_local_deliver (net/ipv4/ip_input.c:562) kern :warn : [ 173.204967] __netif_receive_skb_one_core (net/core/dev.c:5516) kern :warn : [ 173.205527] ? __netif_receive_skb_list_core (net/core/dev.c:5516) kern :warn : [ 173.206101] ? mark_held_locks (kernel/locking/lockdep.c:4281) kern :warn : [ 173.206571] process_backlog (include/linux/rcupdate.h:778 net/core/dev.c:5966) kern :warn : [ 173.207027] __napi_poll+0xa0/0x530 kern :warn : [ 173.207546] net_rx_action (net/core/dev.c:6596 net/core/dev.c:6727) kern :warn : [ 173.207995] ? __napi_poll+0x530/0x530 kern :warn : [ 173.208530] ? reacquire_held_locks (kernel/locking/lockdep.c:5412) kern :warn : [ 173.209039] ? asym_cpu_capacity_scan (kernel/sched/clock.c:389) kern :warn : [ 173.209581] __do_softirq (arch/x86/include/asm/jump_label.h:27 include/linux/jump_label.h:207 include/trace/events/irq.h:142 kernel/softirq.c:554) kern :warn : [ 173.210021] ? __lock_text_end (kernel/softirq.c:511) kern :warn : [ 173.210471] ? irqtime_account_irq (kernel/sched/cputime.c:64) kern :warn : [ 173.210966] ? __dev_queue_xmit (include/linux/rcupdate.h:308 include/linux/rcupdate.h:817 net/core/dev.c:4367) kern :warn : [ 173.211454] do_softirq (kernel/softirq.c:454 kernel/softirq.c:441) kern :warn : [ 173.211870] </IRQ> kern :warn : [ 173.212170] <TASK> kern :warn : [ 173.212479] __local_bh_enable_ip (kernel/softirq.c:381) kern :warn : [ 173.212965] __dev_queue_xmit (net/core/dev.c:4368) kern :warn : [ 173.213441] ? mark_lock (arch/x86/include/asm/bitops.h:228 (discriminator 3) arch/x86/include/asm/bitops.h:240 (discriminator 3) include/asm-generic/bitops/instrumented-non-atomic.h:142 (discriminator 3) kernel/locking/lockdep.c:228 (discriminator 3) kernel/locking/lockdep.c:4663 (discriminator 3)) kern :warn : [ 173.213881] ? mark_lock_irq (kernel/locking/lockdep.c:4646) kern :warn : [ 173.214352] ? netdev_core_pick_tx (net/core/dev.c:4249) kern :warn : [ 173.214861] ? reacquire_held_locks (kernel/locking/lockdep.c:5412) kern :warn : [ 173.215369] ? lock_acquire (kernel/locking/lockdep.c:467 kernel/locking/lockdep.c:5763 kernel/locking/lockdep.c:5726) kern :warn : [ 173.215828] ? mark_held_locks (kernel/locking/lockdep.c:4281) kern :warn : [ 173.216286] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4573) kern :warn : [ 173.216902] ? neigh_hh_output (arch/x86/include/asm/irqflags.h:42 arch/x86/include/asm/irqflags.h:77 arch/x86/include/asm/irqflags.h:135 include/linux/seqlock.h:104 include/linux/seqlock.h:837 include/net/neighbour.h:496) The kernel config and materials to reproduce are available at: https://download.01.org/0day-ci/archive/20230927/202309271628.27fd2187-oliver.sang@intel.com
kernel test robot <oliver.sang@intel.com> wrote: > (please refer to attached dmesg/kmsg for entire log/backtrace) > > > > If you fix the issue in a separate patch/commit (i.e. not just a new version of > the same patch/commit), kindly add following tags > | Reported-by: kernel test robot <oliver.sang@intel.com> > | Closes: https://lore.kernel.org/oe-lkp/202309271628.27fd2187-oliver.sang@intel.com > > > kern :warn : [ 173.147140] ------------[ cut here ]------------ > kern :warn : [ 173.147759] WARNING: CPU: 12 PID: 2260 at net/core/flow_dissector.c:1096 __skb_flow_dissect (net/core/flow_dissector.c:1096 (discriminator 1)) Two options, more 'guess the right netns' in flow dissector: derive netns from skb->dst->dev. Or, pass struct net down to xfrm session decode functions. I'll have a go at option 2 to see how much noise its going to be.
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index a014783f6d8a..1ca4ca5b1367 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -149,6 +149,21 @@ struct xfrm_pol_inexact_candidates { struct hlist_head *res[XFRM_POL_CAND_MAX]; }; +struct xfrm_flow_keys { + struct flow_dissector_key_basic basic; + struct flow_dissector_key_control control; + union { + struct flow_dissector_key_ipv4_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; + } addrs; + struct flow_dissector_key_ip ip; + struct flow_dissector_key_icmp icmp; + struct flow_dissector_key_ports ports; + struct flow_dissector_key_keyid gre; +}; + +static struct flow_dissector xfrm_session_dissector __ro_after_init; + static DEFINE_SPINLOCK(xfrm_if_cb_lock); static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly; @@ -3367,191 +3382,74 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star } static void -decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) +decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse) { - const struct iphdr *iph = ip_hdr(skb); - int ihl = iph->ihl; - u8 *xprth = skb_network_header(skb) + ihl * 4; struct flowi4 *fl4 = &fl->u.ip4; memset(fl4, 0, sizeof(struct flowi4)); - fl4->flowi4_proto = iph->protocol; - fl4->daddr = reverse ? iph->saddr : iph->daddr; - fl4->saddr = reverse ? iph->daddr : iph->saddr; - fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK; - - if (!ip_is_fragment(iph)) { - switch (iph->protocol) { - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - case IPPROTO_TCP: - case IPPROTO_SCTP: - case IPPROTO_DCCP: - if (xprth + 4 < skb->data || - pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be16 *ports; - - xprth = skb_network_header(skb) + ihl * 4; - ports = (__be16 *)xprth; - - fl4->fl4_sport = ports[!!reverse]; - fl4->fl4_dport = ports[!reverse]; - } - break; - case IPPROTO_ICMP: - if (xprth + 2 < skb->data || - pskb_may_pull(skb, xprth + 2 - skb->data)) { - u8 *icmp; - - xprth = skb_network_header(skb) + ihl * 4; - icmp = xprth; - - fl4->fl4_icmp_type = icmp[0]; - fl4->fl4_icmp_code = icmp[1]; - } - break; - case IPPROTO_GRE: - if (xprth + 12 < skb->data || - pskb_may_pull(skb, xprth + 12 - skb->data)) { - __be16 *greflags; - __be32 *gre_hdr; - - xprth = skb_network_header(skb) + ihl * 4; - greflags = (__be16 *)xprth; - gre_hdr = (__be32 *)xprth; - - if (greflags[0] & GRE_KEY) { - if (greflags[0] & GRE_CSUM) - gre_hdr++; - fl4->fl4_gre_key = gre_hdr[1]; - } - } - break; - default: - break; - } + if (reverse) { + fl4->saddr = flkeys->addrs.ipv4.dst; + fl4->daddr = flkeys->addrs.ipv4.src; + fl4->fl4_sport = flkeys->ports.dst; + fl4->fl4_dport = flkeys->ports.src; + } else { + fl4->saddr = flkeys->addrs.ipv4.src; + fl4->daddr = flkeys->addrs.ipv4.dst; + fl4->fl4_sport = flkeys->ports.src; + fl4->fl4_dport = flkeys->ports.dst; } + + fl4->flowi4_proto = flkeys->basic.ip_proto; + fl4->flowi4_tos = flkeys->ip.tos; + fl4->fl4_icmp_type = flkeys->icmp.type; + fl4->fl4_icmp_type = flkeys->icmp.code; + fl4->fl4_gre_key = flkeys->gre.keyid; } #if IS_ENABLED(CONFIG_IPV6) static void -decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse) +decode_session6(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse) { struct flowi6 *fl6 = &fl->u.ip6; - int onlyproto = 0; - const struct ipv6hdr *hdr = ipv6_hdr(skb); - u32 offset = sizeof(*hdr); - struct ipv6_opt_hdr *exthdr; - const unsigned char *nh = skb_network_header(skb); - u16 nhoff = IP6CB(skb)->nhoff; - u8 nexthdr; - - if (!nhoff) - nhoff = offsetof(struct ipv6hdr, nexthdr); - - nexthdr = nh[nhoff]; memset(fl6, 0, sizeof(struct flowi6)); - fl6->daddr = reverse ? hdr->saddr : hdr->daddr; - fl6->saddr = reverse ? hdr->daddr : hdr->saddr; - - while (nh + offset + sizeof(*exthdr) < skb->data || - pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) { - nh = skb_network_header(skb); - exthdr = (struct ipv6_opt_hdr *)(nh + offset); - - switch (nexthdr) { - case NEXTHDR_FRAGMENT: - onlyproto = 1; - fallthrough; - case NEXTHDR_ROUTING: - case NEXTHDR_HOP: - case NEXTHDR_DEST: - offset += ipv6_optlen(exthdr); - nexthdr = exthdr->nexthdr; - break; - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - case IPPROTO_TCP: - case IPPROTO_SCTP: - case IPPROTO_DCCP: - if (!onlyproto && (nh + offset + 4 < skb->data || - pskb_may_pull(skb, nh + offset + 4 - skb->data))) { - __be16 *ports; - - nh = skb_network_header(skb); - ports = (__be16 *)(nh + offset); - fl6->fl6_sport = ports[!!reverse]; - fl6->fl6_dport = ports[!reverse]; - } - fl6->flowi6_proto = nexthdr; - return; - case IPPROTO_ICMPV6: - if (!onlyproto && (nh + offset + 2 < skb->data || - pskb_may_pull(skb, nh + offset + 2 - skb->data))) { - u8 *icmp; - - nh = skb_network_header(skb); - icmp = (u8 *)(nh + offset); - fl6->fl6_icmp_type = icmp[0]; - fl6->fl6_icmp_code = icmp[1]; - } - fl6->flowi6_proto = nexthdr; - return; - case IPPROTO_GRE: - if (!onlyproto && - (nh + offset + 12 < skb->data || - pskb_may_pull(skb, nh + offset + 12 - skb->data))) { - struct gre_base_hdr *gre_hdr; - __be32 *gre_key; - - nh = skb_network_header(skb); - gre_hdr = (struct gre_base_hdr *)(nh + offset); - gre_key = (__be32 *)(gre_hdr + 1); - - if (gre_hdr->flags & GRE_KEY) { - if (gre_hdr->flags & GRE_CSUM) - gre_key++; - fl6->fl6_gre_key = *gre_key; - } - } - fl6->flowi6_proto = nexthdr; - return; - -#if IS_ENABLED(CONFIG_IPV6_MIP6) - case IPPROTO_MH: - offset += ipv6_optlen(exthdr); - if (!onlyproto && (nh + offset + 3 < skb->data || - pskb_may_pull(skb, nh + offset + 3 - skb->data))) { - struct ip6_mh *mh; - - nh = skb_network_header(skb); - mh = (struct ip6_mh *)(nh + offset); - fl6->fl6_mh_type = mh->ip6mh_type; - } - fl6->flowi6_proto = nexthdr; - return; -#endif - default: - fl6->flowi6_proto = nexthdr; - return; - } + if (reverse) { + fl6->saddr = flkeys->addrs.ipv6.dst; + fl6->daddr = flkeys->addrs.ipv6.src; + fl6->fl6_sport = flkeys->ports.dst; + fl6->fl6_dport = flkeys->ports.src; + } else { + fl6->saddr = flkeys->addrs.ipv6.src; + fl6->daddr = flkeys->addrs.ipv6.dst; + fl6->fl6_sport = flkeys->ports.src; + fl6->fl6_dport = flkeys->ports.dst; } + + fl6->flowi6_proto = flkeys->basic.ip_proto; + fl6->fl6_icmp_type = flkeys->icmp.type; + fl6->fl6_icmp_type = flkeys->icmp.code; + fl6->fl6_gre_key = flkeys->gre.keyid; } #endif int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned int family, int reverse) { + struct xfrm_flow_keys flkeys; + + memset(&flkeys, 0, sizeof(flkeys)); + + skb_flow_dissect(skb, &xfrm_session_dissector, &flkeys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); + switch (family) { case AF_INET: - decode_session4(skb, fl, reverse); + decode_session4(&flkeys, fl, reverse); break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - decode_session6(skb, fl, reverse); + decode_session6(&flkeys, fl, reverse); break; #endif default: @@ -4253,8 +4151,47 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { .exit = xfrm_net_exit, }; +static const struct flow_dissector_key xfrm_flow_dissector_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_CONTROL, + .offset = offsetof(struct xfrm_flow_keys, control), + }, + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct xfrm_flow_keys, basic), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, + .offset = offsetof(struct xfrm_flow_keys, addrs.ipv4), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, + .offset = offsetof(struct xfrm_flow_keys, addrs.ipv6), + }, + { + .key_id = FLOW_DISSECTOR_KEY_PORTS, + .offset = offsetof(struct xfrm_flow_keys, ports), + }, + { + .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID, + .offset = offsetof(struct xfrm_flow_keys, gre), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IP, + .offset = offsetof(struct xfrm_flow_keys, ip), + }, + { + .key_id = FLOW_DISSECTOR_KEY_ICMP, + .offset = offsetof(struct xfrm_flow_keys, icmp), + }, +}; + void __init xfrm_init(void) { + skb_flow_dissector_init(&xfrm_session_dissector, + xfrm_flow_dissector_keys, + ARRAY_SIZE(xfrm_flow_dissector_keys)); + register_pernet_subsys(&xfrm_net_ops); xfrm_dev_init(); xfrm_input_init();
xfrm needs to populate ipv4/v6 flow struct for route lookup. In the past there were several bugs in this code: 1. callers that forget to reload header pointers after xfrm_decode_session() (it may pull headers). 2. bugs in decoding where accesses past skb->data occurred. Meanwhile network core gained a packet dissector as well. This switches xfrm to the flow dissector. Changes since RFC: Drop ipv6 mobiliy header support, AFAIU noone uses this. Drop extraction of flowlabel, replaced code doesn't set it either. Link: https://lore.kernel.org/netdev/20230908120628.26164-3-fw@strlen.de/ Signed-off-by: Florian Westphal <fw@strlen.de> --- net/xfrm/xfrm_policy.c | 253 ++++++++++++++++------------------------- 1 file changed, 95 insertions(+), 158 deletions(-)