diff mbox series

[RFC] ceph: Fix kernel crash in generic/397 test

Message ID 266c50167604c606c95a6efe575de5430c31168b.camel@ibm.com (mailing list archive)
State New
Headers show
Series [RFC] ceph: Fix kernel crash in generic/397 test | expand

Commit Message

Viacheslav Dubeyko Jan. 15, 2025, 12:49 a.m. UTC
Hello,

The generic/397 test generate kernel crash for the case of
encrypted inode with unaligned file size (for example, 33K
or 1K):

Jan 3 12:34:40 ceph-testing-0001 root: run xfstest generic/397
Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.737811] run fstests
generic/397 at 2025-01-03 12:34:40
Jan 3 12:34:40 ceph-testing-0001 systemd1: Started /usr/bin/bash c test
-w /proc/self/oom_score_adj && echo 250 > /proc/self/oom_score_adj;
exec ./tests/generic/397.
Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.875761] libceph: mon0
(2)127.0.0.1:40674 session established
Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.876130] libceph:
client4614 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.991965] libceph: mon0
(2)127.0.0.1:40674 session established
Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.992334] libceph:
client4617 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.017234] libceph: mon0
(2)127.0.0.1:40674 session established
Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.017594] libceph:
client4620 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.031394] xfs_io (pid
18988) is setting deprecated v1 encryption policy; recommend upgrading
to v2.
Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.054528] libceph: mon0
(2)127.0.0.1:40674 session established
Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.054892] libceph:
client4623 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.070287] libceph: mon0
(2)127.0.0.1:40674 session established
Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.070704] libceph:
client4626 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.264586] libceph: mon0
(2)127.0.0.1:40674 session established
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.265258] libceph:
client4629 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.374578] -----------[ cut
here ]------------
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.374586] kernel BUG at
net/ceph/messenger.c:1070!
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.375150] Oops: invalid
opcode: 0000 [#1] PREEMPT SMP NOPTI
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.378145] CPU: 2 UID: 0
PID: 4759 Comm: kworker/2:9 Not tainted 6.13.0-rc5+ #1
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.378969] Hardware name:
QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-
ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.380167] Workqueue: ceph-
msgr ceph_con_workfn
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.381639] RIP:
0010:ceph_msg_data_cursor_init+0x42/0x50
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.382152] Code: 89 17 48
8b 46 70 55 48 89 47 08 c7 47 18 00 00 00 00 48 89 e5 e8 de cc ff ff 5d
31 c0 31 d2 31 f6 31 ff c3 cc cc cc cc 0f 0b <0f> 0b 0f 0b 66 2e 0f 1f
84 00 00 00 00 00 90 90 90 90 90 90 90 90
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.383928] RSP:
0018:ffffb4ffc7cbbd28 EFLAGS: 00010287
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.384447] RAX:
ffffffff82bb9ac0 RBX: ffff981390c2f1f8 RCX: 0000000000000000
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.385129] RDX:
0000000000009000 RSI: ffff981288232b58 RDI: ffff981390c2f378
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.385839] RBP:
ffffb4ffc7cbbe18 R08: 0000000000000000 R09: 0000000000000000
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.386539] R10:
0000000000000000 R11: 0000000000000000 R12: ffff981390c2f030
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.387203] R13:
ffff981288232b58 R14: 0000000000000029 R15: 0000000000000001
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.387877] FS:
0000000000000000(0000) GS:ffff9814b7900000(0000) knlGS:0000000000000000
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.388663] CS: 0010 DS:
0000 ES: 0000 CR0: 0000000080050033
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.389212] CR2:
00005e106a0554e0 CR3: 0000000112bf0001 CR4: 0000000000772ef0
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.389921] DR0:
0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.390620] DR3:
0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.391307] PKRU: 55555554
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.391567] Call Trace:
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.391807] <TASK>
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.392021] ?
show_regs+0x71/0x90
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.392391] ? die+0x38/0xa0
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.392667] ?
do_trap+0xdb/0x100
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.392981] ?
do_error_trap+0x75/0xb0
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.393372] ?
ceph_msg_data_cursor_init+0x42/0x50
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.393842] ?
exc_invalid_op+0x53/0x80
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.394232] ?
ceph_msg_data_cursor_init+0x42/0x50
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.394694] ?
asm_exc_invalid_op+0x1b/0x20
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.395099] ?
ceph_msg_data_cursor_init+0x42/0x50
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.395583] ?
ceph_con_v2_try_read+0xd16/0x2220
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.396027] ?
_raw_spin_unlock+0xe/0x40
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.396428] ?
raw_spin_rq_unlock+0x10/0x40
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.396842] ?
finish_task_switch.isra.0+0x97/0x310
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.397338] ?
__schedule+0x44b/0x16b0
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.397738]
ceph_con_workfn+0x326/0x750
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.398121]
process_one_work+0x188/0x3d0
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.398522] ?
__pfx_worker_thread+0x10/0x10
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.398929]
worker_thread+0x2b5/0x3c0
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.399310] ?
__pfx_worker_thread+0x10/0x10
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.399727]
kthread+0xe1/0x120
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.400031] ?
__pfx_kthread+0x10/0x10
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.400431]
ret_from_fork+0x43/0x70
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.400771] ?
__pfx_kthread+0x10/0x10
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.401127]
ret_from_fork_asm+0x1a/0x30
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.401543] </TASK>
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.401760] Modules linked
in: hctr2 nhpoly1305_avx2 nhpoly1305_sse2 nhpoly1305 chacha_generic
chacha_x86_64 libchacha adiantum libpoly1305 essiv authenc mptcp_diag
xsk_diag tcp_diag udp_diag raw_diag inet_diag unix_diag af_packet_diag
netlink_diag intel_rapl_msr intel_rapl_common
intel_uncore_frequency_common skx_edac_common nfit kvm_intel kvm
crct10dif_pclmul crc32_pclmul polyval_clmulni polyval_generic
ghash_clmulni_intel sha256_ssse3 sha1_ssse3 aesni_intel joydev
crypto_simd cryptd rapl input_leds psmouse sch_fq_codel serio_raw bochs
i2c_piix4 floppy qemu_fw_cfg i2c_smbus mac_hid pata_acpi msr parport_pc
ppdev lp parport efi_pstore ip_tables x_tables
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.407319] ---[ end trace
0000000000000000 ]---
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.407775] RIP:
0010:ceph_msg_data_cursor_init+0x42/0x50
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.408317] Code: 89 17 48
8b 46 70 55 48 89 47 08 c7 47 18 00 00 00 00 48 89 e5 e8 de cc ff ff 5d
31 c0 31 d2 31 f6 31 ff c3 cc cc cc cc 0f 0b <0f> 0b 0f 0b 66 2e 0f 1f
84 00 00 00 00 00 90 90 90 90 90 90 90 90
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.410087] RSP:
0018:ffffb4ffc7cbbd28 EFLAGS: 00010287
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.410609] RAX:
ffffffff82bb9ac0 RBX: ffff981390c2f1f8 RCX: 0000000000000000
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.411318] RDX:
0000000000009000 RSI: ffff981288232b58 RDI: ffff981390c2f378
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.412014] RBP:
ffffb4ffc7cbbe18 R08: 0000000000000000 R09: 0000000000000000
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.412735] R10:
0000000000000000 R11: 0000000000000000 R12: ffff981390c2f030
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.413438] R13:
ffff981288232b58 R14: 0000000000000029 R15: 0000000000000001
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.414121] FS:
0000000000000000(0000) GS:ffff9814b7900000(0000) knlGS:0000000000000000
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.414935] CS: 0010 DS:
0000 ES: 0000 CR0: 0000000080050033
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.415516] CR2:
00005e106a0554e0 CR3: 0000000112bf0001 CR4: 0000000000772ef0
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.416211] DR0:
0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.416907] DR3:
0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.417630] PKRU: 55555554

BUG_ON(length > msg->data_length) triggers the issue:

(gdb) l *ceph_msg_data_cursor_init+0x42
0xffffffff823b45a2 is in ceph_msg_data_cursor_init
(net/ceph/messenger.c:1070).
1065
1066 void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor
*cursor,
1067 struct ceph_msg *msg, size_t length)
1068 {
1069 BUG_ON(!length);
1070 BUG_ON(length > msg->data_length);
1071 BUG_ON(!msg->num_data_items);
1072
1073 cursor->total_resid = length;
1074 cursor->data = msg->data;

The issue takes place because of this:
Jan 6 14:59:24 ceph-testing-0001 kernel: [ 202.628853] libceph: pid
144:net/ceph/messenger_v2.c:2034 prepare_sparse_read_data(): msg-
>data_length 33792, msg->sparse_read_total 36864
1070 BUG_ON(length > msg->data_length);
msg->sparse_read_total 36864 > msg->data_length 33792

The generic/397 test (xfstests) executes such steps:
(1) create encrypted files and directories;
(2) access the created files and folders with encryption key;
(3) access the created files and folders without encryption key.

We have issue because there is alignment of length before calling
ceph_osdc_new_request():

Jan 8 12:49:22 ceph-testing-0001 kernel: [ 301.522120] ceph: pid
8931:fs/ceph/crypto.h:148 ceph_fscrypt_adjust_off_and_len(): ino
0x100000009d5: encrypted 0x4000, len 33792
Jan 8 12:49:22 ceph-testing-0001 kernel: [ 301.523706] ceph: pid
8931:fs/ceph/crypto.h:155 ceph_fscrypt_adjust_off_and_len(): ino
0x100000009d5: encrypted 0x4000, len 36864

This patch uses unaligned size for retrieving file's content
from OSD and, then, aligned the size of sparse extent in the
case if inode is encrypted.

./check generic/397
FSTYP         -- ceph
PLATFORM      -- Linux/x86_64 ceph-testing-0001 6.13.0-rc7+ #58 SMP
PREEMPT_DYNAMIC Wed Jan 15 00:07:06 UTC 2025
MKFS_OPTIONS  -- 127.0.0.1:40629:/scratch
MOUNT_OPTIONS -- -o
name=fs,secret=<hidden>,ms_mode=crc,nowsync,copyfrom
127.0.0.1:<port>:/scratch /mnt/scratch

generic/397 1s ...  1s
Ran: generic/397
Passed all 1 tests

Signed-off-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
---
 fs/ceph/addr.c | 30 ++++++++++++++++++++++++------
 fs/ceph/file.c | 20 +++++++++++++++++---
 2 files changed, 41 insertions(+), 9 deletions(-)

+		 */
+
 		req = ceph_osdc_new_request(osdc, &ci->i_layout,
 					ci->i_vino, read_off,
&read_len, 0, 1,
 					sparse ?
CEPH_OSD_OP_SPARSE_READ :
@@ -1111,6 +1114,9 @@ ssize_t __ceph_sync_read(struct inode *inode,
loff_t *ki_pos,
 			break;
 		}
 
+		/* determine new offset/length if encrypted */
+		ceph_fscrypt_adjust_off_and_len(inode, &read_off,
&read_len);
+
 		/* adjust len downward if the request truncated the
len */
 		if (off + len > read_off + read_len)
 			len = read_off + read_len - off;
@@ -1168,7 +1174,15 @@ ssize_t __ceph_sync_read(struct inode *inode,
loff_t *ki_pos,
 		}
 
 		if (IS_ENCRYPTED(inode)) {
+			struct ceph_sparse_extent *ext;
 			int fret;
+			int i;
+
+			for (i = 0; i < op->extent.sparse_ext_cnt;
i++) {
+				ext = &op->extent.sparse_ext[i];
+				ceph_fscrypt_adjust_off_and_len(inode,
+						&ext->off, &ext->len);
+			}
 
 			fret = ceph_fscrypt_decrypt_extents(inode,
pages,
 					read_off, op-
>extent.sparse_ext,

Comments

Ilya Dryomov Jan. 16, 2025, 9:23 a.m. UTC | #1
On Wed, Jan 15, 2025 at 1:49 AM Viacheslav Dubeyko
<Slava.Dubeyko@ibm.com> wrote:
>
> Hello,
>
> The generic/397 test generate kernel crash for the case of
> encrypted inode with unaligned file size (for example, 33K
> or 1K):
>
> Jan 3 12:34:40 ceph-testing-0001 root: run xfstest generic/397
> Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.737811] run fstests
> generic/397 at 2025-01-03 12:34:40
> Jan 3 12:34:40 ceph-testing-0001 systemd1: Started /usr/bin/bash c test
> -w /proc/self/oom_score_adj && echo 250 > /proc/self/oom_score_adj;
> exec ./tests/generic/397.
> Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.875761] libceph: mon0
> (2)127.0.0.1:40674 session established
> Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.876130] libceph:
> client4614 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.991965] libceph: mon0
> (2)127.0.0.1:40674 session established
> Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.992334] libceph:
> client4617 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.017234] libceph: mon0
> (2)127.0.0.1:40674 session established
> Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.017594] libceph:
> client4620 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.031394] xfs_io (pid
> 18988) is setting deprecated v1 encryption policy; recommend upgrading
> to v2.
> Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.054528] libceph: mon0
> (2)127.0.0.1:40674 session established
> Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.054892] libceph:
> client4623 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.070287] libceph: mon0
> (2)127.0.0.1:40674 session established
> Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.070704] libceph:
> client4626 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.264586] libceph: mon0
> (2)127.0.0.1:40674 session established
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.265258] libceph:
> client4629 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.374578] -----------[ cut
> here ]------------
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.374586] kernel BUG at
> net/ceph/messenger.c:1070!
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.375150] Oops: invalid
> opcode: 0000 [#1] PREEMPT SMP NOPTI
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.378145] CPU: 2 UID: 0
> PID: 4759 Comm: kworker/2:9 Not tainted 6.13.0-rc5+ #1
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.378969] Hardware name:
> QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-
> ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.380167] Workqueue: ceph-
> msgr ceph_con_workfn
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.381639] RIP:
> 0010:ceph_msg_data_cursor_init+0x42/0x50
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.382152] Code: 89 17 48
> 8b 46 70 55 48 89 47 08 c7 47 18 00 00 00 00 48 89 e5 e8 de cc ff ff 5d
> 31 c0 31 d2 31 f6 31 ff c3 cc cc cc cc 0f 0b <0f> 0b 0f 0b 66 2e 0f 1f
> 84 00 00 00 00 00 90 90 90 90 90 90 90 90
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.383928] RSP:
> 0018:ffffb4ffc7cbbd28 EFLAGS: 00010287
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.384447] RAX:
> ffffffff82bb9ac0 RBX: ffff981390c2f1f8 RCX: 0000000000000000
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.385129] RDX:
> 0000000000009000 RSI: ffff981288232b58 RDI: ffff981390c2f378
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.385839] RBP:
> ffffb4ffc7cbbe18 R08: 0000000000000000 R09: 0000000000000000
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.386539] R10:
> 0000000000000000 R11: 0000000000000000 R12: ffff981390c2f030
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.387203] R13:
> ffff981288232b58 R14: 0000000000000029 R15: 0000000000000001
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.387877] FS:
> 0000000000000000(0000) GS:ffff9814b7900000(0000) knlGS:0000000000000000
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.388663] CS: 0010 DS:
> 0000 ES: 0000 CR0: 0000000080050033
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.389212] CR2:
> 00005e106a0554e0 CR3: 0000000112bf0001 CR4: 0000000000772ef0
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.389921] DR0:
> 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.390620] DR3:
> 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.391307] PKRU: 55555554
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.391567] Call Trace:
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.391807] <TASK>
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.392021] ?
> show_regs+0x71/0x90
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.392391] ? die+0x38/0xa0
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.392667] ?
> do_trap+0xdb/0x100
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.392981] ?
> do_error_trap+0x75/0xb0
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.393372] ?
> ceph_msg_data_cursor_init+0x42/0x50
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.393842] ?
> exc_invalid_op+0x53/0x80
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.394232] ?
> ceph_msg_data_cursor_init+0x42/0x50
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.394694] ?
> asm_exc_invalid_op+0x1b/0x20
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.395099] ?
> ceph_msg_data_cursor_init+0x42/0x50
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.395583] ?
> ceph_con_v2_try_read+0xd16/0x2220
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.396027] ?
> _raw_spin_unlock+0xe/0x40
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.396428] ?
> raw_spin_rq_unlock+0x10/0x40
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.396842] ?
> finish_task_switch.isra.0+0x97/0x310
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.397338] ?
> __schedule+0x44b/0x16b0
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.397738]
> ceph_con_workfn+0x326/0x750
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.398121]
> process_one_work+0x188/0x3d0
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.398522] ?
> __pfx_worker_thread+0x10/0x10
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.398929]
> worker_thread+0x2b5/0x3c0
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.399310] ?
> __pfx_worker_thread+0x10/0x10
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.399727]
> kthread+0xe1/0x120
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.400031] ?
> __pfx_kthread+0x10/0x10
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.400431]
> ret_from_fork+0x43/0x70
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.400771] ?
> __pfx_kthread+0x10/0x10
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.401127]
> ret_from_fork_asm+0x1a/0x30
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.401543] </TASK>
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.401760] Modules linked
> in: hctr2 nhpoly1305_avx2 nhpoly1305_sse2 nhpoly1305 chacha_generic
> chacha_x86_64 libchacha adiantum libpoly1305 essiv authenc mptcp_diag
> xsk_diag tcp_diag udp_diag raw_diag inet_diag unix_diag af_packet_diag
> netlink_diag intel_rapl_msr intel_rapl_common
> intel_uncore_frequency_common skx_edac_common nfit kvm_intel kvm
> crct10dif_pclmul crc32_pclmul polyval_clmulni polyval_generic
> ghash_clmulni_intel sha256_ssse3 sha1_ssse3 aesni_intel joydev
> crypto_simd cryptd rapl input_leds psmouse sch_fq_codel serio_raw bochs
> i2c_piix4 floppy qemu_fw_cfg i2c_smbus mac_hid pata_acpi msr parport_pc
> ppdev lp parport efi_pstore ip_tables x_tables
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.407319] ---[ end trace
> 0000000000000000 ]---
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.407775] RIP:
> 0010:ceph_msg_data_cursor_init+0x42/0x50
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.408317] Code: 89 17 48
> 8b 46 70 55 48 89 47 08 c7 47 18 00 00 00 00 48 89 e5 e8 de cc ff ff 5d
> 31 c0 31 d2 31 f6 31 ff c3 cc cc cc cc 0f 0b <0f> 0b 0f 0b 66 2e 0f 1f
> 84 00 00 00 00 00 90 90 90 90 90 90 90 90
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.410087] RSP:
> 0018:ffffb4ffc7cbbd28 EFLAGS: 00010287
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.410609] RAX:
> ffffffff82bb9ac0 RBX: ffff981390c2f1f8 RCX: 0000000000000000
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.411318] RDX:
> 0000000000009000 RSI: ffff981288232b58 RDI: ffff981390c2f378
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.412014] RBP:
> ffffb4ffc7cbbe18 R08: 0000000000000000 R09: 0000000000000000
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.412735] R10:
> 0000000000000000 R11: 0000000000000000 R12: ffff981390c2f030
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.413438] R13:
> ffff981288232b58 R14: 0000000000000029 R15: 0000000000000001
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.414121] FS:
> 0000000000000000(0000) GS:ffff9814b7900000(0000) knlGS:0000000000000000
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.414935] CS: 0010 DS:
> 0000 ES: 0000 CR0: 0000000080050033
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.415516] CR2:
> 00005e106a0554e0 CR3: 0000000112bf0001 CR4: 0000000000772ef0
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.416211] DR0:
> 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.416907] DR3:
> 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.417630] PKRU: 55555554
>
> BUG_ON(length > msg->data_length) triggers the issue:
>
> (gdb) l *ceph_msg_data_cursor_init+0x42
> 0xffffffff823b45a2 is in ceph_msg_data_cursor_init
> (net/ceph/messenger.c:1070).
> 1065
> 1066 void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor
> *cursor,
> 1067 struct ceph_msg *msg, size_t length)
> 1068 {
> 1069 BUG_ON(!length);
> 1070 BUG_ON(length > msg->data_length);
> 1071 BUG_ON(!msg->num_data_items);
> 1072
> 1073 cursor->total_resid = length;
> 1074 cursor->data = msg->data;
>
> The issue takes place because of this:
> Jan 6 14:59:24 ceph-testing-0001 kernel: [ 202.628853] libceph: pid
> 144:net/ceph/messenger_v2.c:2034 prepare_sparse_read_data(): msg-
> >data_length 33792, msg->sparse_read_total 36864
> 1070 BUG_ON(length > msg->data_length);
> msg->sparse_read_total 36864 > msg->data_length 33792
>
> The generic/397 test (xfstests) executes such steps:
> (1) create encrypted files and directories;
> (2) access the created files and folders with encryption key;
> (3) access the created files and folders without encryption key.
>
> We have issue because there is alignment of length before calling
> ceph_osdc_new_request():
>
> Jan 8 12:49:22 ceph-testing-0001 kernel: [ 301.522120] ceph: pid
> 8931:fs/ceph/crypto.h:148 ceph_fscrypt_adjust_off_and_len(): ino
> 0x100000009d5: encrypted 0x4000, len 33792
> Jan 8 12:49:22 ceph-testing-0001 kernel: [ 301.523706] ceph: pid
> 8931:fs/ceph/crypto.h:155 ceph_fscrypt_adjust_off_and_len(): ino
> 0x100000009d5: encrypted 0x4000, len 36864
>
> This patch uses unaligned size for retrieving file's content
> from OSD and, then, aligned the size of sparse extent in the
> case if inode is encrypted.

Hi Slava,

This doesn't seem right to me because in the case where a file is
accessed with encryption key the whole block needs to be read from the
OSD, or decryption wouldn't work.  Do tests where fscrypt is fully
engaged (i.e. encryption key and everything else is present) and
encrypted file contents are checked pass with this patch?

BTW this and other recent patches sent from Slava.Dubeyko@ibm.com
appear to be mangled and don't apply:

$ git am --show-current-patch=diff
---
 fs/ceph/addr.c | 30 ++++++++++++++++++++++++------
 fs/ceph/file.c | 20 +++++++++++++++++---
 2 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 85936f6d2bf7..5a7a698ad8e8 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -235,6 +235,15 @@ static void finish_netfs_read(struct
ceph_osd_request *req)
                    subreq->rreq->origin != NETFS_DIO_READ)
                        __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq-
>flags);

The diff hunk header line, __set_bit() line and many more are wrapped.

Thanks,

                Ilya
Ilya Dryomov Jan. 16, 2025, 4:03 p.m. UTC | #2
On Thu, Jan 16, 2025 at 10:23 AM Ilya Dryomov <idryomov@gmail.com> wrote:
>
> On Wed, Jan 15, 2025 at 1:49 AM Viacheslav Dubeyko
> <Slava.Dubeyko@ibm.com> wrote:
> >
> > Hello,
> >
> > The generic/397 test generate kernel crash for the case of
> > encrypted inode with unaligned file size (for example, 33K
> > or 1K):
> >
> > Jan 3 12:34:40 ceph-testing-0001 root: run xfstest generic/397
> > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.737811] run fstests
> > generic/397 at 2025-01-03 12:34:40
> > Jan 3 12:34:40 ceph-testing-0001 systemd1: Started /usr/bin/bash c test
> > -w /proc/self/oom_score_adj && echo 250 > /proc/self/oom_score_adj;
> > exec ./tests/generic/397.
> > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.875761] libceph: mon0
> > (2)127.0.0.1:40674 session established
> > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.876130] libceph:
> > client4614 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.991965] libceph: mon0
> > (2)127.0.0.1:40674 session established
> > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.992334] libceph:
> > client4617 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.017234] libceph: mon0
> > (2)127.0.0.1:40674 session established
> > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.017594] libceph:
> > client4620 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.031394] xfs_io (pid
> > 18988) is setting deprecated v1 encryption policy; recommend upgrading
> > to v2.
> > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.054528] libceph: mon0
> > (2)127.0.0.1:40674 session established
> > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.054892] libceph:
> > client4623 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.070287] libceph: mon0
> > (2)127.0.0.1:40674 session established
> > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.070704] libceph:
> > client4626 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.264586] libceph: mon0
> > (2)127.0.0.1:40674 session established
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.265258] libceph:
> > client4629 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.374578] -----------[ cut
> > here ]------------
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.374586] kernel BUG at
> > net/ceph/messenger.c:1070!
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.375150] Oops: invalid
> > opcode: 0000 [#1] PREEMPT SMP NOPTI
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.378145] CPU: 2 UID: 0
> > PID: 4759 Comm: kworker/2:9 Not tainted 6.13.0-rc5+ #1
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.378969] Hardware name:
> > QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-
> > ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.380167] Workqueue: ceph-
> > msgr ceph_con_workfn
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.381639] RIP:
> > 0010:ceph_msg_data_cursor_init+0x42/0x50
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.382152] Code: 89 17 48
> > 8b 46 70 55 48 89 47 08 c7 47 18 00 00 00 00 48 89 e5 e8 de cc ff ff 5d
> > 31 c0 31 d2 31 f6 31 ff c3 cc cc cc cc 0f 0b <0f> 0b 0f 0b 66 2e 0f 1f
> > 84 00 00 00 00 00 90 90 90 90 90 90 90 90
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.383928] RSP:
> > 0018:ffffb4ffc7cbbd28 EFLAGS: 00010287
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.384447] RAX:
> > ffffffff82bb9ac0 RBX: ffff981390c2f1f8 RCX: 0000000000000000
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.385129] RDX:
> > 0000000000009000 RSI: ffff981288232b58 RDI: ffff981390c2f378
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.385839] RBP:
> > ffffb4ffc7cbbe18 R08: 0000000000000000 R09: 0000000000000000
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.386539] R10:
> > 0000000000000000 R11: 0000000000000000 R12: ffff981390c2f030
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.387203] R13:
> > ffff981288232b58 R14: 0000000000000029 R15: 0000000000000001
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.387877] FS:
> > 0000000000000000(0000) GS:ffff9814b7900000(0000) knlGS:0000000000000000
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.388663] CS: 0010 DS:
> > 0000 ES: 0000 CR0: 0000000080050033
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.389212] CR2:
> > 00005e106a0554e0 CR3: 0000000112bf0001 CR4: 0000000000772ef0
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.389921] DR0:
> > 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.390620] DR3:
> > 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.391307] PKRU: 55555554
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.391567] Call Trace:
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.391807] <TASK>
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.392021] ?
> > show_regs+0x71/0x90
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.392391] ? die+0x38/0xa0
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.392667] ?
> > do_trap+0xdb/0x100
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.392981] ?
> > do_error_trap+0x75/0xb0
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.393372] ?
> > ceph_msg_data_cursor_init+0x42/0x50
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.393842] ?
> > exc_invalid_op+0x53/0x80
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.394232] ?
> > ceph_msg_data_cursor_init+0x42/0x50
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.394694] ?
> > asm_exc_invalid_op+0x1b/0x20
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.395099] ?
> > ceph_msg_data_cursor_init+0x42/0x50
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.395583] ?
> > ceph_con_v2_try_read+0xd16/0x2220
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.396027] ?
> > _raw_spin_unlock+0xe/0x40
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.396428] ?
> > raw_spin_rq_unlock+0x10/0x40
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.396842] ?
> > finish_task_switch.isra.0+0x97/0x310
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.397338] ?
> > __schedule+0x44b/0x16b0
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.397738]
> > ceph_con_workfn+0x326/0x750
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.398121]
> > process_one_work+0x188/0x3d0
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.398522] ?
> > __pfx_worker_thread+0x10/0x10
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.398929]
> > worker_thread+0x2b5/0x3c0
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.399310] ?
> > __pfx_worker_thread+0x10/0x10
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.399727]
> > kthread+0xe1/0x120
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.400031] ?
> > __pfx_kthread+0x10/0x10
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.400431]
> > ret_from_fork+0x43/0x70
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.400771] ?
> > __pfx_kthread+0x10/0x10
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.401127]
> > ret_from_fork_asm+0x1a/0x30
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.401543] </TASK>
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.401760] Modules linked
> > in: hctr2 nhpoly1305_avx2 nhpoly1305_sse2 nhpoly1305 chacha_generic
> > chacha_x86_64 libchacha adiantum libpoly1305 essiv authenc mptcp_diag
> > xsk_diag tcp_diag udp_diag raw_diag inet_diag unix_diag af_packet_diag
> > netlink_diag intel_rapl_msr intel_rapl_common
> > intel_uncore_frequency_common skx_edac_common nfit kvm_intel kvm
> > crct10dif_pclmul crc32_pclmul polyval_clmulni polyval_generic
> > ghash_clmulni_intel sha256_ssse3 sha1_ssse3 aesni_intel joydev
> > crypto_simd cryptd rapl input_leds psmouse sch_fq_codel serio_raw bochs
> > i2c_piix4 floppy qemu_fw_cfg i2c_smbus mac_hid pata_acpi msr parport_pc
> > ppdev lp parport efi_pstore ip_tables x_tables
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.407319] ---[ end trace
> > 0000000000000000 ]---
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.407775] RIP:
> > 0010:ceph_msg_data_cursor_init+0x42/0x50
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.408317] Code: 89 17 48
> > 8b 46 70 55 48 89 47 08 c7 47 18 00 00 00 00 48 89 e5 e8 de cc ff ff 5d
> > 31 c0 31 d2 31 f6 31 ff c3 cc cc cc cc 0f 0b <0f> 0b 0f 0b 66 2e 0f 1f
> > 84 00 00 00 00 00 90 90 90 90 90 90 90 90
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.410087] RSP:
> > 0018:ffffb4ffc7cbbd28 EFLAGS: 00010287
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.410609] RAX:
> > ffffffff82bb9ac0 RBX: ffff981390c2f1f8 RCX: 0000000000000000
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.411318] RDX:
> > 0000000000009000 RSI: ffff981288232b58 RDI: ffff981390c2f378
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.412014] RBP:
> > ffffb4ffc7cbbe18 R08: 0000000000000000 R09: 0000000000000000
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.412735] R10:
> > 0000000000000000 R11: 0000000000000000 R12: ffff981390c2f030
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.413438] R13:
> > ffff981288232b58 R14: 0000000000000029 R15: 0000000000000001
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.414121] FS:
> > 0000000000000000(0000) GS:ffff9814b7900000(0000) knlGS:0000000000000000
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.414935] CS: 0010 DS:
> > 0000 ES: 0000 CR0: 0000000080050033
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.415516] CR2:
> > 00005e106a0554e0 CR3: 0000000112bf0001 CR4: 0000000000772ef0
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.416211] DR0:
> > 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.416907] DR3:
> > 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.417630] PKRU: 55555554
> >
> > BUG_ON(length > msg->data_length) triggers the issue:
> >
> > (gdb) l *ceph_msg_data_cursor_init+0x42
> > 0xffffffff823b45a2 is in ceph_msg_data_cursor_init
> > (net/ceph/messenger.c:1070).
> > 1065
> > 1066 void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor
> > *cursor,
> > 1067 struct ceph_msg *msg, size_t length)
> > 1068 {
> > 1069 BUG_ON(!length);
> > 1070 BUG_ON(length > msg->data_length);
> > 1071 BUG_ON(!msg->num_data_items);
> > 1072
> > 1073 cursor->total_resid = length;
> > 1074 cursor->data = msg->data;
> >
> > The issue takes place because of this:
> > Jan 6 14:59:24 ceph-testing-0001 kernel: [ 202.628853] libceph: pid
> > 144:net/ceph/messenger_v2.c:2034 prepare_sparse_read_data(): msg-
> > >data_length 33792, msg->sparse_read_total 36864
> > 1070 BUG_ON(length > msg->data_length);
> > msg->sparse_read_total 36864 > msg->data_length 33792
> >
> > The generic/397 test (xfstests) executes such steps:
> > (1) create encrypted files and directories;
> > (2) access the created files and folders with encryption key;
> > (3) access the created files and folders without encryption key.
> >
> > We have issue because there is alignment of length before calling
> > ceph_osdc_new_request():
> >
> > Jan 8 12:49:22 ceph-testing-0001 kernel: [ 301.522120] ceph: pid
> > 8931:fs/ceph/crypto.h:148 ceph_fscrypt_adjust_off_and_len(): ino
> > 0x100000009d5: encrypted 0x4000, len 33792
> > Jan 8 12:49:22 ceph-testing-0001 kernel: [ 301.523706] ceph: pid
> > 8931:fs/ceph/crypto.h:155 ceph_fscrypt_adjust_off_and_len(): ino
> > 0x100000009d5: encrypted 0x4000, len 36864
> >
> > This patch uses unaligned size for retrieving file's content
> > from OSD and, then, aligned the size of sparse extent in the
> > case if inode is encrypted.
>
> Hi Slava,
>
> This doesn't seem right to me because in the case where a file is
> accessed with encryption key the whole block needs to be read from the
> OSD, or decryption wouldn't work.  Do tests where fscrypt is fully
> engaged (i.e. encryption key and everything else is present) and
> encrypted file contents are checked pass with this patch?

I spent some time looking into this.  The fact that op->extent.length
is 36864 while msg->data_length is 33792 suggests that the issue is
with the (only) OSD data item added to the read request: the extent is
rounded to CEPH_FSCRYPT_BLOCK_SIZE, but the data item remains short.
Here is the code in question:

    if (IS_ENCRYPTED(inode)) {
            struct page **pages;
            size_t page_off;

            err = iov_iter_get_pages_alloc2(&subreq->io_iter, &pages, len,
                                            &page_off);
            if (err < 0) {
                    doutc(cl, "%llx.%llx failed to allocate pages, %d\n",
                          ceph_vinop(inode), err);
                    goto out;
            }

            /* should always give us a page-aligned read */
            WARN_ON_ONCE(page_off);
            len = err;
            err = 0;

            osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false,
                                             false);

My guess is that iov_iter_get_pages_alloc2() ends up returning
33792 even though it's called with max_size set to 36864 (the value of
adjusted len prior to "len = err" assignment).  The reason for that is
likely that subreq->io_iter iterator is already truncated to 33792 at
that point because that must be value of subreq->len.  Then, following
"len = err" assignment osd_req_op_extent_osd_data_pages() is passed
33792 even though there is presumably a full page available.

This looks like a regression introduced with ee4cdf7ba857 ("netfs:
Speed up buffered reading") because prior to that commit CephFS had its
own iterator which was set up with adjusted len instead of subreq->len:

    iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages,
                    subreq->start, len);

I wonder if this would "magically" fix itself if we just comment out
"len = err" assignment.  Slava, can you try that?  It's not a proper
fix but it would nail down the root cause.

I'm not up to speed on netfslib, so adding David to keep me straight.

Thanks,

                Ilya
Viacheslav Dubeyko Jan. 16, 2025, 7:04 p.m. UTC | #3
On Thu, 2025-01-16 at 17:03 +0100, Ilya Dryomov wrote:
> On Thu, Jan 16, 2025 at 10:23 AM Ilya Dryomov <idryomov@gmail.com>
> wrote:
> > 
> > On Wed, Jan 15, 2025 at 1:49 AM Viacheslav Dubeyko
> > <Slava.Dubeyko@ibm.com> wrote:
> > > 
> > > Hello,
> > > 
> > > The generic/397 test generate kernel crash for the case of
> > > encrypted inode with unaligned file size (for example, 33K
> > > or 1K):
> > > 
> > > Jan 3 12:34:40 ceph-testing-0001 root: run xfstest generic/397
> > > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.737811] run
> > > fstests
> > > generic/397 at 2025-01-03 12:34:40
> > > Jan 3 12:34:40 ceph-testing-0001 systemd1: Started /usr/bin/bash
> > > c test
> > > -w /proc/self/oom_score_adj && echo 250 >
> > > /proc/self/oom_score_adj;
> > > exec ./tests/generic/397.
> > > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.875761] libceph:
> > > mon0
> > > (2)127.0.0.1:40674 session established
> > > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.876130] libceph:
> > > client4614 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> > > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.991965] libceph:
> > > mon0
> > > (2)127.0.0.1:40674 session established
> > > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 877.992334] libceph:
> > > client4617 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> > > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.017234] libceph:
> > > mon0
> > > (2)127.0.0.1:40674 session established
> > > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.017594] libceph:
> > > client4620 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> > > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.031394] xfs_io
> > > (pid
> > > 18988) is setting deprecated v1 encryption policy; recommend
> > > upgrading
> > > to v2.
> > > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.054528] libceph:
> > > mon0
> > > (2)127.0.0.1:40674 session established
> > > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.054892] libceph:
> > > client4623 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> > > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.070287] libceph:
> > > mon0
> > > (2)127.0.0.1:40674 session established
> > > Jan 3 12:34:40 ceph-testing-0001 kernel: [ 878.070704] libceph:
> > > client4626 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.264586] libceph:
> > > mon0
> > > (2)127.0.0.1:40674 session established
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.265258] libceph:
> > > client4629 fsid 19b90bca-f1ae-47a6-93dd-0b03ee637949
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.374578] ----------
> > > -[ cut
> > > here ]------------
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.374586] kernel BUG
> > > at
> > > net/ceph/messenger.c:1070!
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.375150] Oops:
> > > invalid
> > > opcode: 0000 [#1] PREEMPT SMP NOPTI
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.378145] CPU: 2
> > > UID: 0
> > > PID: 4759 Comm: kworker/2:9 Not tainted 6.13.0-rc5+ #1
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.378969] Hardware
> > > name:
> > > QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-
> > > ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.380167] Workqueue:
> > > ceph-
> > > msgr ceph_con_workfn
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.381639] RIP:
> > > 0010:ceph_msg_data_cursor_init+0x42/0x50
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.382152] Code: 89
> > > 17 48
> > > 8b 46 70 55 48 89 47 08 c7 47 18 00 00 00 00 48 89 e5 e8 de cc ff
> > > ff 5d
> > > 31 c0 31 d2 31 f6 31 ff c3 cc cc cc cc 0f 0b <0f> 0b 0f 0b 66 2e
> > > 0f 1f
> > > 84 00 00 00 00 00 90 90 90 90 90 90 90 90
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.383928] RSP:
> > > 0018:ffffb4ffc7cbbd28 EFLAGS: 00010287
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.384447] RAX:
> > > ffffffff82bb9ac0 RBX: ffff981390c2f1f8 RCX: 0000000000000000
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.385129] RDX:
> > > 0000000000009000 RSI: ffff981288232b58 RDI: ffff981390c2f378
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.385839] RBP:
> > > ffffb4ffc7cbbe18 R08: 0000000000000000 R09: 0000000000000000
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.386539] R10:
> > > 0000000000000000 R11: 0000000000000000 R12: ffff981390c2f030
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.387203] R13:
> > > ffff981288232b58 R14: 0000000000000029 R15: 0000000000000001
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.387877] FS:
> > > 0000000000000000(0000) GS:ffff9814b7900000(0000)
> > > knlGS:0000000000000000
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.388663] CS: 0010
> > > DS:
> > > 0000 ES: 0000 CR0: 0000000080050033
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.389212] CR2:
> > > 00005e106a0554e0 CR3: 0000000112bf0001 CR4: 0000000000772ef0
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.389921] DR0:
> > > 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.390620] DR3:
> > > 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.391307] PKRU:
> > > 55555554
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.391567] Call
> > > Trace:
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.391807] <TASK>
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.392021] ?
> > > show_regs+0x71/0x90
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.392391] ?
> > > die+0x38/0xa0
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.392667] ?
> > > do_trap+0xdb/0x100
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.392981] ?
> > > do_error_trap+0x75/0xb0
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.393372] ?
> > > ceph_msg_data_cursor_init+0x42/0x50
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.393842] ?
> > > exc_invalid_op+0x53/0x80
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.394232] ?
> > > ceph_msg_data_cursor_init+0x42/0x50
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.394694] ?
> > > asm_exc_invalid_op+0x1b/0x20
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.395099] ?
> > > ceph_msg_data_cursor_init+0x42/0x50
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.395583] ?
> > > ceph_con_v2_try_read+0xd16/0x2220
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.396027] ?
> > > _raw_spin_unlock+0xe/0x40
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.396428] ?
> > > raw_spin_rq_unlock+0x10/0x40
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.396842] ?
> > > finish_task_switch.isra.0+0x97/0x310
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.397338] ?
> > > __schedule+0x44b/0x16b0
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.397738]
> > > ceph_con_workfn+0x326/0x750
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.398121]
> > > process_one_work+0x188/0x3d0
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.398522] ?
> > > __pfx_worker_thread+0x10/0x10
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.398929]
> > > worker_thread+0x2b5/0x3c0
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.399310] ?
> > > __pfx_worker_thread+0x10/0x10
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.399727]
> > > kthread+0xe1/0x120
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.400031] ?
> > > __pfx_kthread+0x10/0x10
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.400431]
> > > ret_from_fork+0x43/0x70
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.400771] ?
> > > __pfx_kthread+0x10/0x10
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.401127]
> > > ret_from_fork_asm+0x1a/0x30
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.401543] </TASK>
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.401760] Modules
> > > linked
> > > in: hctr2 nhpoly1305_avx2 nhpoly1305_sse2 nhpoly1305
> > > chacha_generic
> > > chacha_x86_64 libchacha adiantum libpoly1305 essiv authenc
> > > mptcp_diag
> > > xsk_diag tcp_diag udp_diag raw_diag inet_diag unix_diag
> > > af_packet_diag
> > > netlink_diag intel_rapl_msr intel_rapl_common
> > > intel_uncore_frequency_common skx_edac_common nfit kvm_intel kvm
> > > crct10dif_pclmul crc32_pclmul polyval_clmulni polyval_generic
> > > ghash_clmulni_intel sha256_ssse3 sha1_ssse3 aesni_intel joydev
> > > crypto_simd cryptd rapl input_leds psmouse sch_fq_codel serio_raw
> > > bochs
> > > i2c_piix4 floppy qemu_fw_cfg i2c_smbus mac_hid pata_acpi msr
> > > parport_pc
> > > ppdev lp parport efi_pstore ip_tables x_tables
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.407319] ---[ end
> > > trace
> > > 0000000000000000 ]---
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.407775] RIP:
> > > 0010:ceph_msg_data_cursor_init+0x42/0x50
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.408317] Code: 89
> > > 17 48
> > > 8b 46 70 55 48 89 47 08 c7 47 18 00 00 00 00 48 89 e5 e8 de cc ff
> > > ff 5d
> > > 31 c0 31 d2 31 f6 31 ff c3 cc cc cc cc 0f 0b <0f> 0b 0f 0b 66 2e
> > > 0f 1f
> > > 84 00 00 00 00 00 90 90 90 90 90 90 90 90
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.410087] RSP:
> > > 0018:ffffb4ffc7cbbd28 EFLAGS: 00010287
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.410609] RAX:
> > > ffffffff82bb9ac0 RBX: ffff981390c2f1f8 RCX: 0000000000000000
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.411318] RDX:
> > > 0000000000009000 RSI: ffff981288232b58 RDI: ffff981390c2f378
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.412014] RBP:
> > > ffffb4ffc7cbbe18 R08: 0000000000000000 R09: 0000000000000000
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.412735] R10:
> > > 0000000000000000 R11: 0000000000000000 R12: ffff981390c2f030
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.413438] R13:
> > > ffff981288232b58 R14: 0000000000000029 R15: 0000000000000001
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.414121] FS:
> > > 0000000000000000(0000) GS:ffff9814b7900000(0000)
> > > knlGS:0000000000000000
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.414935] CS: 0010
> > > DS:
> > > 0000 ES: 0000 CR0: 0000000080050033
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.415516] CR2:
> > > 00005e106a0554e0 CR3: 0000000112bf0001 CR4: 0000000000772ef0
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.416211] DR0:
> > > 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.416907] DR3:
> > > 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> > > Jan 3 12:34:41 ceph-testing-0001 kernel: [ 878.417630] PKRU:
> > > 55555554
> > > 
> > > BUG_ON(length > msg->data_length) triggers the issue:
> > > 
> > > (gdb) l *ceph_msg_data_cursor_init+0x42
> > > 0xffffffff823b45a2 is in ceph_msg_data_cursor_init
> > > (net/ceph/messenger.c:1070).
> > > 1065
> > > 1066 void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor
> > > *cursor,
> > > 1067 struct ceph_msg *msg, size_t length)
> > > 1068 {
> > > 1069 BUG_ON(!length);
> > > 1070 BUG_ON(length > msg->data_length);
> > > 1071 BUG_ON(!msg->num_data_items);
> > > 1072
> > > 1073 cursor->total_resid = length;
> > > 1074 cursor->data = msg->data;
> > > 
> > > The issue takes place because of this:
> > > Jan 6 14:59:24 ceph-testing-0001 kernel: [ 202.628853] libceph:
> > > pid
> > > 144:net/ceph/messenger_v2.c:2034 prepare_sparse_read_data(): msg-
> > > > data_length 33792, msg->sparse_read_total 36864
> > > 1070 BUG_ON(length > msg->data_length);
> > > msg->sparse_read_total 36864 > msg->data_length 33792
> > > 
> > > The generic/397 test (xfstests) executes such steps:
> > > (1) create encrypted files and directories;
> > > (2) access the created files and folders with encryption key;
> > > (3) access the created files and folders without encryption key.
> > > 
> > > We have issue because there is alignment of length before calling
> > > ceph_osdc_new_request():
> > > 
> > > Jan 8 12:49:22 ceph-testing-0001 kernel: [ 301.522120] ceph: pid
> > > 8931:fs/ceph/crypto.h:148 ceph_fscrypt_adjust_off_and_len(): ino
> > > 0x100000009d5: encrypted 0x4000, len 33792
> > > Jan 8 12:49:22 ceph-testing-0001 kernel: [ 301.523706] ceph: pid
> > > 8931:fs/ceph/crypto.h:155 ceph_fscrypt_adjust_off_and_len(): ino
> > > 0x100000009d5: encrypted 0x4000, len 36864
> > > 
> > > This patch uses unaligned size for retrieving file's content
> > > from OSD and, then, aligned the size of sparse extent in the
> > > case if inode is encrypted.
> > 
> > Hi Slava,
> > 
> > This doesn't seem right to me because in the case where a file is
> > accessed with encryption key the whole block needs to be read from
> > the
> > OSD, or decryption wouldn't work.  Do tests where fscrypt is fully
> > engaged (i.e. encryption key and everything else is present) and
> > encrypted file contents are checked pass with this patch?
> 
> I spent some time looking into this.  The fact that op->extent.length
> is 36864 while msg->data_length is 33792 suggests that the issue is
> with the (only) OSD data item added to the read request: the extent
> is
> rounded to CEPH_FSCRYPT_BLOCK_SIZE, but the data item remains short.
> Here is the code in question:
> 
>     if (IS_ENCRYPTED(inode)) {
>             struct page **pages;
>             size_t page_off;
> 
>             err = iov_iter_get_pages_alloc2(&subreq->io_iter, &pages,
> len,
>                                             &page_off);
>             if (err < 0) {
>                     doutc(cl, "%llx.%llx failed to allocate pages,
> %d\n",
>                           ceph_vinop(inode), err);
>                     goto out;
>             }
> 
>             /* should always give us a page-aligned read */
>             WARN_ON_ONCE(page_off);
>             len = err;
>             err = 0;
> 
>             osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
> false,
>                                              false);
> 
> My guess is that iov_iter_get_pages_alloc2() ends up returning
> 33792 even though it's called with max_size set to 36864 (the value
> of
> adjusted len prior to "len = err" assignment).  The reason for that
> is
> likely that subreq->io_iter iterator is already truncated to 33792 at
> that point because that must be value of subreq->len.  Then,
> following
> "len = err" assignment osd_req_op_extent_osd_data_pages() is passed
> 33792 even though there is presumably a full page available.
> 
> This looks like a regression introduced with ee4cdf7ba857 ("netfs:
> Speed up buffered reading") because prior to that commit CephFS had
> its
> own iterator which was set up with adjusted len instead of subreq-
> >len:
> 
>     iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages,
>                     subreq->start, len);
> 
> I wonder if this would "magically" fix itself if we just comment out
> "len = err" assignment.  Slava, can you try that?  It's not a proper
> fix but it would nail down the root cause.
> 

Let me try this. I also not really like the suggested fix. So, we need
to elaborate the proper solution for this.

Thanks,
Slava.
David Howells Jan. 17, 2025, 9:29 p.m. UTC | #4
Note that netfslib provides some tracepoints that you can use to try and
illuminate what's going on there.

echo 1 > /sys/kernel/debug/tracing/events/netfs/netfs_read/enable
echo 1 > /sys/kernel/debug/tracing/events/netfs/netfs_rreq/enable
echo 1 > /sys/kernel/debug/tracing/events/netfs/netfs_sreq/enable
echo 1 > /sys/kernel/debug/tracing/events/netfs/netfs_failure/enable

for starters.

David
diff mbox series

Patch

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 85936f6d2bf7..5a7a698ad8e8 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -235,6 +235,15 @@  static void finish_netfs_read(struct
ceph_osd_request *req)
 		    subreq->rreq->origin != NETFS_DIO_READ)
 			__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq-
>flags);
 		if (IS_ENCRYPTED(inode) && err > 0) {
+			struct ceph_sparse_extent *ext;
+			int i;
+
+			for (i = 0; i < op->extent.sparse_ext_cnt;
i++) {
+				ext = &op->extent.sparse_ext[i];
+				ceph_fscrypt_adjust_off_and_len(inode,
+						&ext->off, &ext->len);
+			}
+
 			err = ceph_fscrypt_decrypt_extents(inode,
 					osd_data->pages, subreq-
>start,
 					op->extent.sparse_ext,
@@ -357,13 +366,12 @@  static void ceph_netfs_issue_read(struct
netfs_io_subrequest *subreq)
 	if (ceph_has_inline_data(ci) &&
ceph_netfs_issue_op_inline(subreq))
 		return;
 
-	// TODO: This rounding here is slightly dodgy.  It *should*
work, for
-	// now, as the cache only deals in blocks that are a multiple
of
-	// PAGE_SIZE and fscrypt blocks are at most PAGE_SIZE.  What
needs to
-	// happen is for the fscrypt driving to be moved into netfslib
and the
-	// data in the cache also to be stored encrypted.
+	/*
+	 * It needs to provide the real legth (not aligned) for
network subsystem.
+	 * Otherwise, ceph_msg_data_cursor_init() triggers BUG_ON() in
the case
+	 * if msg->sparse_read_total > msg->data_length.
+	 */
 	len = subreq->len;
-	ceph_fscrypt_adjust_off_and_len(inode, &off, &len);
 
 	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
vino,
 			off, &len, 0, 1, sparse ?
CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ,
@@ -375,6 +383,16 @@  static void ceph_netfs_issue_read(struct
netfs_io_subrequest *subreq)
 		goto out;
 	}
 
+	// TODO: This rounding here is slightly dodgy.  It *should*
work, for
+	// now, as the cache only deals in blocks that are a multiple
of
+	// PAGE_SIZE and fscrypt blocks are at most PAGE_SIZE.  What
needs to
+	// happen is for the fscrypt driving to be moved into netfslib
and the
+	// data in the cache also to be stored encrypted.
+	ceph_fscrypt_adjust_off_and_len(inode, &off, &len);
+
+	doutc(cl, "%llx.%llx pos=%llu orig_len=%zu len=%llu\n",
+	      ceph_vinop(inode), subreq->start, subreq->len, len);
+
 	if (sparse) {
 		extent_cnt = __ceph_sparse_read_ext_count(inode, len);
 		err = ceph_alloc_sparse_ext_map(&req->r_ops[0],
extent_cnt);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 851d70200c6b..9446a5f001aa 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1093,12 +1093,15 @@  ssize_t __ceph_sync_read(struct inode *inode,
loff_t *ki_pos,
 		u64 read_len = len;
 		int extent_cnt;
 
-		/* determine new offset/length if encrypted */
-		ceph_fscrypt_adjust_off_and_len(inode, &read_off,
&read_len);
-
 		doutc(cl, "orig %llu~%llu reading %llu~%llu", off,
len,
 		      read_off, read_len);
 
+		/*
+		 * It needs to provide the real legth (not aligned)
for network subsystem.
+		 * Otherwise, ceph_msg_data_cursor_init() triggers
BUG_ON() in the case
+		 * if msg->sparse_read_total > msg->data_length.