diff mbox series

[BlueZ,v2,3/5] sco-tester: test local and remote disconnecting simultaneously

Message ID 79e933c8dee03c77c2b05a9d90612d04ed991da2.1690907478.git.pav@iki.fi (mailing list archive)
State Accepted
Commit 80c8266ae20ae6bf6d468d0b32d9ebffc1f4d400
Headers show
Series Additional tests for ISO and hci_sync | expand

Checks

Context Check Description
tedd_an/pre-ci_am success Success
tedd_an/CheckPatch success CheckPatch PASS
tedd_an/GitLint fail WARNING: I3 - ignore-body-lines: gitlint will be switching from using Python regex 'match' (match beginning) to 'search' (match anywhere) semantics. Please review your ignore-body-lines.regex option accordingly. To remove this warning, set general.regex-style-search=True. More details: https://jorisroovers.github.io/gitlint/configuration/#regex-style-search 6: B3 Line contains hard tab characters (\t): " [controller] > HCI Synchronous Connect Complete" 7: B3 Line contains hard tab characters (\t): " [controller] > HCI Disconnection Complete (from remote)" 8: B3 Line contains hard tab characters (\t): " [user] shutdown(sco_socket)" 9: B3 Line contains hard tab characters (\t): " [kernel] hci_conn_abort(SCO handle)" 10: B3 Line contains hard tab characters (\t): " [kernel] > HCI Create Connection Cancel" 11: B3 Line contains hard tab characters (\t): " [kernel] < HCI Synchronous Connect Complete" 12: B3 Line contains hard tab characters (\t): " [kernel] < HCI Disconnect Complete" 13: B3 Line contains hard tab characters (\t): " [controller] < HCI Create Connection Cancel" 14: B3 Line contains hard tab characters (\t): " [controller] > HCI Command Status (Create Connection Cancel)" 15: B3 Line contains hard tab characters (\t): " [kernel] < HCI Command Status (Create Connection Cancel)" 31: B2 Line has trailing whitespace: " " 35: B2 Line has trailing whitespace: " " 36: B1 Line exceeds max length (84>80): " CPU: 0 PID: 35 Comm: kworker/u3:2 Not tainted 6.5.0-rc1-00520-gf57f797eebfe #152" 37: B1 Line exceeds max length (85>80): " Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014" 64: B2 Line has trailing whitespace: " " 76: B2 Line has trailing whitespace: " " 92: B2 Line has trailing whitespace: " " 105: B2 Line has trailing whitespace: " " 110: B2 Line has trailing whitespace: " " 112: B1 Line exceeds max length (93>80): " page:ffffea00000a7800 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x29e0" 119: B2 Line has trailing whitespace: " "
tedd_an/IncrementalBuild success Incremental Build PASS

Commit Message

Pauli Virtanen Aug. 1, 2023, 4:38 p.m. UTC
Demonstrate a kernel race condition when remote side disconnects at the
same time as local side tries to cancel the connection. I.e.

	[controller] > HCI Synchronous Connect Complete
	[controller] > HCI Disconnection Complete (from remote)
	[user] shutdown(sco_socket)
	[kernel] hci_conn_abort(SCO handle)
	[kernel] > HCI Create Connection Cancel
	[kernel] < HCI Synchronous Connect Complete
	[kernel] < HCI Disconnect Complete
	[controller] < HCI Create Connection Cancel
	[controller] > HCI Command Status (Create Connection Cancel)
	[kernel] < HCI Command Status (Create Connection Cancel)

and then we get BUG: KASAN: slab-use-after-free in hci_conn_failed when
hci_conn_abort tries to delete the same connection a second time.

This type of crash is probably not limited to the sequence here, but for
this one it was possible to get the timing right in the emulator.

Add a test that hits this in the emulator environment (pretty narrow
window to hit on real hardware):

eSCO Simultaneous Disconnect - Failure
---

Notes:
    v2: no change
    
    ==================================================================
    BUG: KASAN: slab-use-after-free in hci_conn_failed+0x25/0x190
    Read of size 8 at addr ffff8880029e1958 by task kworker/u3:2/35
    
    CPU: 0 PID: 35 Comm: kworker/u3:2 Not tainted 6.5.0-rc1-00520-gf57f797eebfe #152
    Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014
    Workqueue: hci0 hci_cmd_sync_work
    Call Trace:
     <TASK>
     print_report+0xce/0x620
     ? __virt_addr_valid+0xd8/0x160
     ? hci_conn_failed+0x25/0x190
     kasan_report+0xd5/0x110
     ? hci_conn_failed+0x25/0x190
     hci_conn_failed+0x25/0x190
     hci_abort_conn_sync+0x23b/0x370
     ? __pfx_hci_abort_conn_sync+0x10/0x10
     ? __pfx_lock_acquire+0x10/0x10
     ? __pfx_abort_conn_sync+0x10/0x10
     ? __pfx_abort_conn_sync+0x10/0x10
     hci_cmd_sync_work+0x125/0x200
     process_one_work+0x4ee/0x8f0
     ? __pfx_process_one_work+0x10/0x10
     ? __kthread_parkme+0x5f/0xe0
     ? mark_held_locks+0x1a/0x90
     worker_thread+0x8c/0x630
     ? __kthread_parkme+0xc5/0xe0
     ? __pfx_worker_thread+0x10/0x10
     kthread+0x17c/0x1c0
     ? __pfx_kthread+0x10/0x10
     ret_from_fork+0x2b/0x50
     </TASK>
    
    Allocated by task 31:
     kasan_save_stack+0x33/0x60
     kasan_set_track+0x24/0x30
     __kasan_kmalloc+0x8f/0xa0
     hci_conn_add+0xa8/0xad0
     hci_connect_sco+0x1cf/0x6e0
     sco_sock_connect+0x1a2/0x600
     __sys_connect+0x1a2/0x1d0
     __x64_sys_connect+0x3b/0x50
     do_syscall_64+0x47/0x90
     entry_SYSCALL_64_after_hwframe+0x6c/0xd6
    
    Freed by task 32:
     kasan_save_stack+0x33/0x60
     kasan_set_track+0x24/0x30
     kasan_save_free_info+0x2b/0x50
     __kasan_slab_free+0xfa/0x150
     __kmem_cache_free+0xab/0x200
     device_release+0x58/0xf0
     kobject_put+0xee/0x310
     hci_disconn_complete_evt+0x276/0x3a0
     hci_event_packet+0x54b/0x800
     hci_rx_work+0x2a4/0xae0
     process_one_work+0x4ee/0x8f0
     worker_thread+0x8c/0x630
     kthread+0x17c/0x1c0
     ret_from_fork+0x2b/0x50
    
    Last potentially related work creation:
     kasan_save_stack+0x33/0x60
     __kasan_record_aux_stack+0x94/0xa0
     insert_work+0x2d/0x150
     __queue_work+0x2f1/0x610
     queue_delayed_work_on+0x88/0x90
     sco_chan_del+0x117/0x230
     sco_sock_shutdown+0x109/0x230
     __sys_shutdown+0xb4/0x130
     __x64_sys_shutdown+0x29/0x40
     do_syscall_64+0x47/0x90
     entry_SYSCALL_64_after_hwframe+0x6c/0xd6
    
    The buggy address belongs to the object at ffff8880029e1000
     which belongs to the cache kmalloc-4k of size 4096
    The buggy address is located 2392 bytes inside of
     freed 4096-byte region [ffff8880029e1000, ffff8880029e2000)
    
    The buggy address belongs to the physical page:
    page:ffffea00000a7800 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x29e0
    head:ffffea00000a7800 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0
    flags: 0x4000000000010200(slab|head|zone=1)
    page_type: 0xffffffff()
    raw: 4000000000010200 ffff8880010424c0 ffffea0000063010 ffffea00000a8610
    raw: 0000000000000000 0000000000020002 00000001ffffffff 0000000000000000
    page dumped because: kasan: bad access detected
    
    Memory state around the buggy address:
     ffff8880029e1800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
     ffff8880029e1880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
    >ffff8880029e1900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                                                        ^
     ffff8880029e1980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
     ffff8880029e1a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
    ==================================================================

 tools/sco-tester.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
diff mbox series

Patch

diff --git a/tools/sco-tester.c b/tools/sco-tester.c
index 44606328a..f3de42c7b 100644
--- a/tools/sco-tester.c
+++ b/tools/sco-tester.c
@@ -29,6 +29,7 @@ 
 
 #include "src/shared/tester.h"
 #include "src/shared/mgmt.h"
+#include "src/shared/util.h"
 
 struct test_data {
 	const void *test_data;
@@ -37,6 +38,7 @@  struct test_data {
 	struct hciemu *hciemu;
 	enum hciemu_type hciemu_type;
 	unsigned int io_id;
+	int sk;
 	bool disable_esco;
 	bool enable_codecs;
 };
@@ -225,6 +227,7 @@  static void test_data_free(void *test_data)
 			break; \
 		user->hciemu_type = HCIEMU_TYPE_BREDRLE; \
 		user->io_id = 0; \
+		user->sk = -1; \
 		user->test_data = data; \
 		user->disable_esco = _disable_esco; \
 		user->enable_codecs = _enable_codecs; \
@@ -250,6 +253,10 @@  static const struct sco_client_data connect_failure = {
 	.expect_err = EOPNOTSUPP
 };
 
+static const struct sco_client_data connect_failure_reset = {
+	.expect_err = ECONNRESET
+};
+
 const uint8_t data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
 
 static const struct sco_client_data connect_send_success = {
@@ -650,6 +657,8 @@  static void test_connect(const void *test_data)
 		return;
 	}
 
+	data->sk = sk;
+
 	io = g_io_channel_unix_new(sk);
 	g_io_channel_set_close_on_unref(io, TRUE);
 
@@ -745,6 +754,52 @@  static void test_connect_offload_msbc(const void *test_data)
 end:
 	close(sk);
 }
+
+static bool hook_simult_disc(const void *msg, uint16_t len, void *user_data)
+{
+	const struct bt_hci_evt_sync_conn_complete *ev = msg;
+	struct test_data *data = tester_get_data();
+	struct bthost *bthost;
+
+	tester_print("Simultaneous disconnect");
+
+	if (len != sizeof(struct bt_hci_evt_sync_conn_complete)) {
+		tester_test_failed();
+		return true;
+	}
+
+	/* Disconnect from local and remote sides at the same time */
+	bthost = hciemu_client_get_host(data->hciemu);
+	bthost_hci_disconnect(bthost, le16_to_cpu(ev->handle), 0x13);
+
+	shutdown(data->sk, SHUT_RDWR);
+
+	return true;
+}
+
+static bool hook_delay_cmd(const void *data, uint16_t len, void *user_data)
+{
+	tester_print("Delaying emulator response...");
+	g_usleep(250000);
+	tester_print("Delaying emulator response... Done.");
+	return true;
+}
+
+static void test_connect_simult_disc(const void *test_data)
+{
+	struct test_data *data = tester_get_data();
+
+	/* Kernel shall not crash, but <= 6.5-rc1 crash */
+	hciemu_add_hook(data->hciemu, HCIEMU_HOOK_POST_EVT,
+					BT_HCI_EVT_SYNC_CONN_COMPLETE,
+					hook_simult_disc, NULL);
+	hciemu_add_hook(data->hciemu, HCIEMU_HOOK_PRE_CMD,
+					BT_HCI_CMD_CREATE_CONN_CANCEL,
+					hook_delay_cmd, NULL);
+
+	test_connect(test_data);
+}
+
 int main(int argc, char *argv[])
 {
 	tester_init(&argc, &argv);
@@ -767,6 +822,10 @@  int main(int argc, char *argv[])
 	test_sco("eSCO mSBC - Success", &connect_success, setup_powered,
 							test_connect_transp);
 
+	test_sco("eSCO Simultaneous Disconnect - Failure",
+					&connect_failure_reset, setup_powered,
+					test_connect_simult_disc);
+
 	test_sco_11("SCO CVSD 1.1 - Success", &connect_success, setup_powered,
 							test_connect);