diff mbox series

[bpf-next] bpf: Address KCSAN report on bpf_lru_list

Message ID 20230511043748.1384166-1-martin.lau@linux.dev (mailing list archive)
State Accepted
Commit ee9fd0ac3017c4313be91a220a9ac4c99dde7ad4
Delegated to: BPF
Headers show
Series [bpf-next] bpf: Address KCSAN report on bpf_lru_list | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 20 this patch: 20
netdev/cc_maintainers warning 7 maintainers not CCed: yhs@fb.com kpsingh@kernel.org john.fastabend@gmail.com song@kernel.org sdf@google.com jolsa@kernel.org haoluo@google.com
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 20 this patch: 20
netdev/checkpatch warning WARNING: Possible repeated word: 'Google'
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_verifier on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-36 success Logs for veristat

Commit Message

Martin KaFai Lau May 11, 2023, 4:37 a.m. UTC
From: Martin KaFai Lau <martin.lau@kernel.org>

KCSAN reported a data-race when accessing node->ref.
Although node->ref does not have to be accurate,
take this chance to use a more common READ_ONCE() and WRITE_ONCE()
pattern instead of data_race().

There is an existing bpf_lru_node_is_ref() and bpf_lru_node_set_ref().
This patch also adds bpf_lru_node_clear_ref() to do the
WRITE_ONCE(node->ref, 0) also.

==================================================================
BUG: KCSAN: data-race in __bpf_lru_list_rotate / __htab_lru_percpu_map_update_elem

write to 0xffff888137038deb of 1 bytes by task 11240 on cpu 1:
__bpf_lru_node_move kernel/bpf/bpf_lru_list.c:113 [inline]
__bpf_lru_list_rotate_active kernel/bpf/bpf_lru_list.c:149 [inline]
__bpf_lru_list_rotate+0x1bf/0x750 kernel/bpf/bpf_lru_list.c:240
bpf_lru_list_pop_free_to_local kernel/bpf/bpf_lru_list.c:329 [inline]
bpf_common_lru_pop_free kernel/bpf/bpf_lru_list.c:447 [inline]
bpf_lru_pop_free+0x638/0xe20 kernel/bpf/bpf_lru_list.c:499
prealloc_lru_pop kernel/bpf/hashtab.c:290 [inline]
__htab_lru_percpu_map_update_elem+0xe7/0x820 kernel/bpf/hashtab.c:1316
bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313
bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200
generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687
bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534
__sys_bpf+0x338/0x810
__do_sys_bpf kernel/bpf/syscall.c:5096 [inline]
__se_sys_bpf kernel/bpf/syscall.c:5094 [inline]
__x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

read to 0xffff888137038deb of 1 bytes by task 11241 on cpu 0:
bpf_lru_node_set_ref kernel/bpf/bpf_lru_list.h:70 [inline]
__htab_lru_percpu_map_update_elem+0x2f1/0x820 kernel/bpf/hashtab.c:1332
bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313
bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200
generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687
bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534
__sys_bpf+0x338/0x810
__do_sys_bpf kernel/bpf/syscall.c:5096 [inline]
__se_sys_bpf kernel/bpf/syscall.c:5094 [inline]
__x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

value changed: 0x01 -> 0x00

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 11241 Comm: syz-executor.3 Not tainted 6.3.0-rc7-syzkaller-00136-g6a66fdd29ea1 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/30/2023
==================================================================

Reported-by: syzbot+ebe648a84e8784763f82@syzkaller.appspotmail.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 kernel/bpf/bpf_lru_list.c | 21 +++++++++++++--------
 kernel/bpf/bpf_lru_list.h |  7 ++-----
 2 files changed, 15 insertions(+), 13 deletions(-)

Comments

Yonghong Song May 12, 2023, 2:24 p.m. UTC | #1
On 5/10/23 9:37 PM, Martin KaFai Lau wrote:
> From: Martin KaFai Lau <martin.lau@kernel.org>
> 
> KCSAN reported a data-race when accessing node->ref.
> Although node->ref does not have to be accurate,
> take this chance to use a more common READ_ONCE() and WRITE_ONCE()
> pattern instead of data_race().
> 
> There is an existing bpf_lru_node_is_ref() and bpf_lru_node_set_ref().
> This patch also adds bpf_lru_node_clear_ref() to do the
> WRITE_ONCE(node->ref, 0) also.
> 
> ==================================================================
> BUG: KCSAN: data-race in __bpf_lru_list_rotate / __htab_lru_percpu_map_update_elem
> 
> write to 0xffff888137038deb of 1 bytes by task 11240 on cpu 1:
> __bpf_lru_node_move kernel/bpf/bpf_lru_list.c:113 [inline]
> __bpf_lru_list_rotate_active kernel/bpf/bpf_lru_list.c:149 [inline]
> __bpf_lru_list_rotate+0x1bf/0x750 kernel/bpf/bpf_lru_list.c:240
> bpf_lru_list_pop_free_to_local kernel/bpf/bpf_lru_list.c:329 [inline]
> bpf_common_lru_pop_free kernel/bpf/bpf_lru_list.c:447 [inline]
> bpf_lru_pop_free+0x638/0xe20 kernel/bpf/bpf_lru_list.c:499
> prealloc_lru_pop kernel/bpf/hashtab.c:290 [inline]
> __htab_lru_percpu_map_update_elem+0xe7/0x820 kernel/bpf/hashtab.c:1316
> bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313
> bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200
> generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687
> bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534
> __sys_bpf+0x338/0x810
> __do_sys_bpf kernel/bpf/syscall.c:5096 [inline]
> __se_sys_bpf kernel/bpf/syscall.c:5094 [inline]
> __x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094
> do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
> entry_SYSCALL_64_after_hwframe+0x63/0xcd
> 
> read to 0xffff888137038deb of 1 bytes by task 11241 on cpu 0:
> bpf_lru_node_set_ref kernel/bpf/bpf_lru_list.h:70 [inline]
> __htab_lru_percpu_map_update_elem+0x2f1/0x820 kernel/bpf/hashtab.c:1332
> bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313
> bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200
> generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687
> bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534
> __sys_bpf+0x338/0x810
> __do_sys_bpf kernel/bpf/syscall.c:5096 [inline]
> __se_sys_bpf kernel/bpf/syscall.c:5094 [inline]
> __x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094
> do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
> entry_SYSCALL_64_after_hwframe+0x63/0xcd
> 
> value changed: 0x01 -> 0x00
> 
> Reported by Kernel Concurrency Sanitizer on:
> CPU: 0 PID: 11241 Comm: syz-executor.3 Not tainted 6.3.0-rc7-syzkaller-00136-g6a66fdd29ea1 #0
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/30/2023
> ==================================================================
> 
> Reported-by: syzbot+ebe648a84e8784763f82@syzkaller.appspotmail.com
> Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>

Acked-by: Yonghong Song <yhs@fb.com>
patchwork-bot+netdevbpf@kernel.org May 12, 2023, 7:10 p.m. UTC | #2
Hello:

This patch was applied to bpf/bpf-next.git (master)
by Alexei Starovoitov <ast@kernel.org>:

On Wed, 10 May 2023 21:37:48 -0700 you wrote:
> From: Martin KaFai Lau <martin.lau@kernel.org>
> 
> KCSAN reported a data-race when accessing node->ref.
> Although node->ref does not have to be accurate,
> take this chance to use a more common READ_ONCE() and WRITE_ONCE()
> pattern instead of data_race().
> 
> [...]

Here is the summary with links:
  - [bpf-next] bpf: Address KCSAN report on bpf_lru_list
    https://git.kernel.org/bpf/bpf-next/c/ee9fd0ac3017

You are awesome, thank you!
diff mbox series

Patch

diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c
index d99e89f113c4..3dabdd137d10 100644
--- a/kernel/bpf/bpf_lru_list.c
+++ b/kernel/bpf/bpf_lru_list.c
@@ -41,7 +41,12 @@  static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l)
 /* bpf_lru_node helpers */
 static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node)
 {
-	return node->ref;
+	return READ_ONCE(node->ref);
+}
+
+static void bpf_lru_node_clear_ref(struct bpf_lru_node *node)
+{
+	WRITE_ONCE(node->ref, 0);
 }
 
 static void bpf_lru_list_count_inc(struct bpf_lru_list *l,
@@ -89,7 +94,7 @@  static void __bpf_lru_node_move_in(struct bpf_lru_list *l,
 
 	bpf_lru_list_count_inc(l, tgt_type);
 	node->type = tgt_type;
-	node->ref = 0;
+	bpf_lru_node_clear_ref(node);
 	list_move(&node->list, &l->lists[tgt_type]);
 }
 
@@ -110,7 +115,7 @@  static void __bpf_lru_node_move(struct bpf_lru_list *l,
 		bpf_lru_list_count_inc(l, tgt_type);
 		node->type = tgt_type;
 	}
-	node->ref = 0;
+	bpf_lru_node_clear_ref(node);
 
 	/* If the moving node is the next_inactive_rotation candidate,
 	 * move the next_inactive_rotation pointer also.
@@ -353,7 +358,7 @@  static void __local_list_add_pending(struct bpf_lru *lru,
 	*(u32 *)((void *)node + lru->hash_offset) = hash;
 	node->cpu = cpu;
 	node->type = BPF_LRU_LOCAL_LIST_T_PENDING;
-	node->ref = 0;
+	bpf_lru_node_clear_ref(node);
 	list_add(&node->list, local_pending_list(loc_l));
 }
 
@@ -419,7 +424,7 @@  static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru,
 	if (!list_empty(free_list)) {
 		node = list_first_entry(free_list, struct bpf_lru_node, list);
 		*(u32 *)((void *)node + lru->hash_offset) = hash;
-		node->ref = 0;
+		bpf_lru_node_clear_ref(node);
 		__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
 	}
 
@@ -522,7 +527,7 @@  static void bpf_common_lru_push_free(struct bpf_lru *lru,
 		}
 
 		node->type = BPF_LRU_LOCAL_LIST_T_FREE;
-		node->ref = 0;
+		bpf_lru_node_clear_ref(node);
 		list_move(&node->list, local_free_list(loc_l));
 
 		raw_spin_unlock_irqrestore(&loc_l->lock, flags);
@@ -568,7 +573,7 @@  static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf,
 
 		node = (struct bpf_lru_node *)(buf + node_offset);
 		node->type = BPF_LRU_LIST_T_FREE;
-		node->ref = 0;
+		bpf_lru_node_clear_ref(node);
 		list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
 		buf += elem_size;
 	}
@@ -594,7 +599,7 @@  static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf,
 		node = (struct bpf_lru_node *)(buf + node_offset);
 		node->cpu = cpu;
 		node->type = BPF_LRU_LIST_T_FREE;
-		node->ref = 0;
+		bpf_lru_node_clear_ref(node);
 		list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
 		i++;
 		buf += elem_size;
diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h
index 4ea227c9c1ad..8f3c8b2b4490 100644
--- a/kernel/bpf/bpf_lru_list.h
+++ b/kernel/bpf/bpf_lru_list.h
@@ -64,11 +64,8 @@  struct bpf_lru {
 
 static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node)
 {
-	/* ref is an approximation on access frequency.  It does not
-	 * have to be very accurate.  Hence, no protection is used.
-	 */
-	if (!node->ref)
-		node->ref = 1;
+	if (!READ_ONCE(node->ref))
+		WRITE_ONCE(node->ref, 1);
 }
 
 int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,