diff mbox series

[RFC,v2] iov_iter: Convert iterate*() to inline funcs

Message ID 855.1692047347@warthog.procyon.org.uk (mailing list archive)
State New, archived
Headers show
Series [RFC,v2] iov_iter: Convert iterate*() to inline funcs | expand

Commit Message

David Howells Aug. 14, 2023, 9:09 p.m. UTC
Convert the iov_iter iteration macros to inline functions to make the code
easier to follow.  Ideally, the optimiser would produce much the same code
in both cases, but the revised code ends up a bit bigger.

The changes in compiled function size on x86_64 look like:

        __export_symbol_iov_iter_init            inc 0x3 -> 0x8 +0x5
        _copy_from_iter                          inc 0x36e -> 0x395 +0x27
        _copy_from_iter_flushcache               inc 0x359 -> 0x364 +0xb
        _copy_from_iter_nocache                  dcr 0x36a -> 0x33e -0x2c
        _copy_mc_to_iter                         dcr 0x3a7 -> 0x38f -0x18
        _copy_to_iter                            dcr 0x358 -> 0x330 -0x28
        copy_from_user_iter.isra.0               new 0x32
        copy_page_from_iter_atomic.part.0        inc 0x3cf -> 0x3f3 +0x24
        copy_page_to_iter_nofault.part.0         dcr 0x3f1 -> 0x3a9 -0x48
        copy_to_user_iter.constprop.0            new 0x32
        copy_to_user_iter_mc.constprop.0         new 0x2c
        copyin                                   del 0x30
        copyout                                  del 0x2d
        copyout_mc                               del 0x2b
        csum_and_copy_from_iter                  dcr 0x3e8 -> 0x3e5 -0x3
        csum_and_copy_to_iter                    dcr 0x46a -> 0x446 -0x24
        iov_iter_zero                            dcr 0x34f -> 0x338 -0x17
        memcpy_from_iter.isra.0                  del 0x1f
        memcpy_from_iter_mc                      new 0x2b

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Alexander Viro <viro@zeniv.linux.org.uk>
cc: Jens Axboe <axboe@kernel.dk>
cc: Christoph Hellwig <hch@lst.de>,
cc: Christian Brauner <christian@brauner.io>,
cc: Matthew Wilcox <willy@infradead.org>,
cc: Linus Torvalds <torvalds@linux-foundation.org>
cc: linux-block@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
Link: https://lore.kernel.org/r/3710261.1691764329@warthog.procyon.org.uk/ # v1
---
ver #2)
 - Rebased on top of Willy's changes in linux-next.
 - Change the checksum argument to the iteration functions to be a general
   void* and use it to pass iter->copy_mc flag to memcpy_from_iter_mc() to
   avoid using a function pointer.
 - Arrange the end of the iterate_*() functions to look the same to give
   the optimiser the best chance.
 - Make iterate_and_advance() a wrapper around iterate_and_advance2().
 - Adjust iterate_and_advance2() to use if-else-if-else-if-else rather than
   switch(), to put ITER_BVEC before KVEC and to mark UBUF and IOVEC as
   likely().
 - Move "iter->count += progress" into iterate_and_advance2() from the
   iterate functions.
 - Mark a number of the iterator helpers with __always_inline.
 - Fix _copy_from_iter_flushcache() to use memcpy_from_iter_flushcache()
   not memcpy_from_iter().

 lib/iov_iter.c |  605 ++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 365 insertions(+), 240 deletions(-)

Comments

David Howells Aug. 14, 2023, 9:40 p.m. UTC | #1
>         _copy_from_iter                          inc 0x36e -> 0x395 +0x27

Here a disassembly of _copy_from_iter() from unpatched and patched, marked up for
the different iterator-type branches.  To summarise:

		UNPATCHED	PATCHED
		START	LEN	START	LEN
		=======	=======	=======	=======
Prologue	0	77	0	76
UBUF		77	36	76	36
IOVEC		113	148	112	105
BVEC		261	159	217	163
KVEC		420	125	380	116
XARRAY		545	286	496	374
DISCARD/Epi	831	42	870	42
Return		873	-	912	-


The overall change in the entire file, according to size, is:
   19855     744       0   20599    5077 build3/lib/iov_iter.o -- before
   19739     864       0   20603    507b build3/lib/iov_iter.o -- after

David
UNPATCHED
<+0>:	push   %r15
<+2>:	push   %r14
<+4>:	push   %r13
<+6>:	push   %r12
<+8>:	push   %rbp
<+9>:	push   %rbx
<+10>:	sub    $0x50,%rsp
<+14>:	mov    %rdi,(%rsp)
<+18>:	mov    %gs:0x28,%rax
<+27>:	mov    %rax,0x48(%rsp)
<+32>:	xor    %eax,%eax
<+34>:	cmpb   $0x0,0x3(%rdx)
<+38>:	je     0xffffffff81779593 <_copy_from_iter+67>
<+40>:	mov    0x18(%rdx),%rax
<+44>:	mov    %rdx,%r15
<+47>:	cmp    %rax,%rsi
<+50>:	cmovbe %rsi,%rax
<+54>:	test   %rax,%rax
<+57>:	mov    %rax,%r13
<+60>:	jne    0xffffffff8177959d <_copy_from_iter+77>
<+62>:	jmp    0xffffffff81779893 <_copy_from_iter+835>
<+67>:	ud2
<+69>:	xor    %r13d,%r13d
<+72>:	jmp    0xffffffff81779893 <_copy_from_iter+835>

# ITER_UBUF
<+77>:	mov    (%rdx),%al
<+79>:	cmp    $0x5,%al
<+81>:	jne    0xffffffff817795c1 <_copy_from_iter+113>
<+83>:	mov    0x8(%rdx),%rsi
<+87>:	mov    (%rsp),%rdi
<+91>:	add    0x10(%rdx),%rsi
<+95>:	mov    %r13,%rdx
<+98>:	call   0xffffffff81777905 <copyin>
<+103>:	cltq
<+105>:	sub    %rax,%r13
<+108>:	jmp    0xffffffff8177988b <_copy_from_iter+827>

# ITER_IOVEC
<+113>:	test   %al,%al
<+115>:	jne    0xffffffff81779655 <_copy_from_iter+261>
<+121>:	mov    0x10(%rdx),%rbp
<+125>:	xor    %r12d,%r12d
<+128>:	mov    0x8(%rdx),%rbx
<+132>:	mov    0x8(%rbp),%rdx
<+136>:	sub    %rbx,%rdx
<+139>:	cmp    %r13,%rdx
<+142>:	cmova  %r13,%rdx
<+146>:	test   %rdx,%rdx
<+149>:	je     0xffffffff81779619 <_copy_from_iter+201>
<+151>:	mov    0x0(%rbp),%rsi
<+155>:	mov    (%rsp),%rax
<+159>:	add    %rbx,%rsi
<+162>:	lea    (%rax,%r12,1),%rdi
<+166>:	call   0xffffffff81777905 <copyin>
<+171>:	mov    %rdx,%rcx
<+174>:	mov    %r13,%r8
<+177>:	cltq
<+179>:	sub    %rdx,%r8
<+182>:	lea    (%r8,%rax,1),%r13
<+186>:	sub    %rax,%rcx
<+189>:	add    %rcx,%r12
<+192>:	add    %rcx,%rbx
<+195>:	cmp    0x8(%rbp),%rbx
<+199>:	jb     0xffffffff81779626 <_copy_from_iter+214>
<+201>:	add    $0x10,%rbp
<+205>:	xor    %ebx,%ebx
<+207>:	test   %r13,%r13
<+210>:	jne    0xffffffff817795d4 <_copy_from_iter+132>
<+212>:	jmp    0xffffffff81779629 <_copy_from_iter+217>
<+214>:	mov    %rbx,%r13
<+217>:	cmpb   $0x5,(%r15)
<+221>:	mov    %r13,0x8(%r15)
<+225>:	lea    0x10(%r15),%rdx
<+229>:	je     0xffffffff8177963b <_copy_from_iter+235>
<+231>:	mov    0x10(%r15),%rdx
<+235>:	mov    %rbp,%rax
<+238>:	mov    %rbp,0x10(%r15)
<+242>:	mov    %r12,%r13
<+245>:	sub    %rdx,%rax
<+248>:	sar    $0x4,%rax
<+252>:	sub    %rax,0x20(%r15)
<+256>:	jmp    0xffffffff8177988f <_copy_from_iter+831>

# ITER_BVEC
<+261>:	cmp    $0x2,%al
<+263>:	jne    0xffffffff817796f4 <_copy_from_iter+420>
<+269>:	mov    0x10(%rdx),%r14
<+273>:	xor    %ebp,%ebp
<+275>:	mov    0x8(%rdx),%r12d
<+279>:	mov    0xc(%r14),%ecx
<+283>:	add    %r12d,%ecx
<+286>:	mov    %ecx,%edi
<+288>:	and    $0xfff,%ecx
<+294>:	shr    $0xc,%edi
<+297>:	shl    $0x6,%rdi
<+301>:	add    (%r14),%rdi
<+304>:	call   0xffffffff81777282 <kmap_local_page>
<+309>:	mov    0x8(%r14),%ebx
<+313>:	mov    $0x1000,%edx
<+318>:	mov    0x1(%r15),%dil
<+322>:	sub    %r12d,%ebx
<+325>:	cmp    %r13,%rbx
<+328>:	cmova  %r13,%rbx
<+332>:	sub    %rcx,%rdx
<+335>:	cmp    %rdx,%rbx
<+338>:	cmova  %rdx,%rbx
<+342>:	lea    (%rax,%rcx,1),%rdx
<+346>:	mov    (%rsp),%rax
<+350>:	mov    %rbx,%rcx
<+353>:	add    %ebx,%r12d
<+356>:	lea    (%rax,%rbp,1),%rsi
<+360>:	add    %rbx,%rbp
<+363>:	call   0xffffffff81779531 <memcpy_from_iter>
<+368>:	cmp    %r12d,0x8(%r14)
<+372>:	jne    0xffffffff817796cd <_copy_from_iter+381>
<+374>:	add    $0x10,%r14
<+378>:	xor    %r12d,%r12d
<+381>:	sub    %rbx,%r13
<+384>:	jne    0xffffffff81779667 <_copy_from_iter+279>
<+386>:	mov    %r12d,%eax
<+389>:	mov    %rbp,%r13
<+392>:	mov    %rax,0x8(%r15)
<+396>:	mov    %r14,%rax
<+399>:	sub    0x10(%r15),%rax
<+403>:	mov    %r14,0x10(%r15)
<+407>:	sar    $0x4,%rax
<+411>:	sub    %rax,0x20(%r15)
<+415>:	jmp    0xffffffff8177988f <_copy_from_iter+831>

# ITER_KVEC
<+420>:	cmp    $0x1,%al
<+422>:	jne    0xffffffff81779771 <_copy_from_iter+545>
<+424>:	mov    0x10(%rdx),%r12
<+428>:	xor    %r14d,%r14d
<+431>:	mov    0x8(%rdx),%rbp
<+435>:	mov    0x8(%r12),%rbx
<+440>:	sub    %rbp,%rbx
<+443>:	cmp    %r13,%rbx
<+446>:	cmova  %r13,%rbx
<+450>:	test   %rbx,%rbx
<+453>:	je     0xffffffff81779742 <_copy_from_iter+498>
<+455>:	mov    (%r12),%rdx
<+459>:	mov    %rbx,%rcx
<+462>:	sub    %rbx,%r13
<+465>:	mov    (%rsp),%rax
<+469>:	mov    0x1(%r15),%dil
<+473>:	add    %rbp,%rdx
<+476>:	add    %rbx,%rbp
<+479>:	lea    (%rax,%r14,1),%rsi
<+483>:	add    %rbx,%r14
<+486>:	call   0xffffffff81779531 <memcpy_from_iter>
<+491>:	cmp    0x8(%r12),%rbp
<+496>:	jb     0xffffffff8177974f <_copy_from_iter+511>
<+498>:	add    $0x10,%r12
<+502>:	xor    %ebp,%ebp
<+504>:	test   %r13,%r13
<+507>:	jne    0xffffffff81779703 <_copy_from_iter+435>
<+509>:	jmp    0xffffffff81779752 <_copy_from_iter+514>
<+511>:	mov    %rbp,%r13
<+514>:	mov    %r12,%rax
<+517>:	sub    0x10(%r15),%rax
<+521>:	mov    %r13,0x8(%r15)
<+525>:	mov    %r14,%r13
<+528>:	mov    %r12,0x10(%r15)
<+532>:	sar    $0x4,%rax
<+536>:	sub    %rax,0x20(%r15)
<+540>:	jmp    0xffffffff8177988f <_copy_from_iter+831>

# ITER_XARRAY
<+545>:	cmp    $0x3,%al
<+547>:	jne    0xffffffff8177988f <_copy_from_iter+831>
<+553>:	mov    0x10(%rdx),%rax
<+557>:	mov    $0x1000,%ebp
<+562>:	movq   $0x3,0x28(%rsp)
<+571>:	mov    0x8(%rdx),%r14
<+575>:	add    0x20(%rdx),%r14
<+579>:	xor    %edx,%edx
<+581>:	mov    %rdx,0x30(%rsp)
<+586>:	mov    %rax,0x10(%rsp)
<+591>:	mov    %rdx,0x38(%rsp)
<+596>:	mov    %r14,%rax
<+599>:	mov    %rdx,0x40(%rsp)
<+604>:	shr    $0xc,%rax
<+608>:	mov    %rax,0x18(%rsp)
<+613>:	xor    %eax,%eax
<+615>:	mov    %eax,0x20(%rsp)
<+619>:	mov    %r14,%rax
<+622>:	and    $0xfff,%eax
<+627>:	sub    %rax,%rbp
<+630>:	call   0xffffffff81126bcf <__rcu_read_lock>
<+635>:	or     $0xffffffffffffffff,%rsi
<+639>:	lea    0x10(%rsp),%rdi
<+644>:	call   0xffffffff81d5edaf <xas_find>
<+649>:	mov    %r13,0x8(%rsp)
<+654>:	mov    %rax,%rbx
<+657>:	xor    %r13d,%r13d
<+660>:	test   %rbx,%rbx
<+663>:	je     0xffffffff81779886 <_copy_from_iter+822>
<+669>:	lea    0x10(%rsp),%rdi
<+674>:	mov    %rbx,%rsi
<+677>:	call   0xffffffff81777300 <xas_retry>
<+682>:	test   %al,%al
<+684>:	jne    0xffffffff81779874 <_copy_from_iter+804>
<+686>:	test   $0x1,%bl
<+689>:	jne    0xffffffff81779824 <_copy_from_iter+724>
<+691>:	mov    %rbx,%rdi
<+694>:	call   0xffffffff81270e56 <folio_test_hugetlb>
<+699>:	test   %al,%al
<+701>:	jne    0xffffffff81779824 <_copy_from_iter+724>
<+703>:	lea    (%r14,%r13,1),%rdx
<+707>:	mov    %rbx,%rdi
<+710>:	call   0xffffffff81777266 <folio_size>
<+715>:	lea    -0x1(%rax),%r12
<+719>:	and    %rdx,%r12
<+722>:	jmp    0xffffffff81779867 <_copy_from_iter+791>
<+724>:	ud2
<+726>:	jmp    0xffffffff81779886 <_copy_from_iter+822>
<+728>:	mov    %r12,%rsi
<+731>:	mov    %rbx,%rdi
<+734>:	call   0xffffffff817772a7 <kmap_local_folio>
<+739>:	mov    0x1(%r15),%dil
<+743>:	mov    %rax,%rdx
<+746>:	mov    (%rsp),%rax
<+750>:	cmp    %rbp,0x8(%rsp)
<+755>:	cmovbe 0x8(%rsp),%rbp
<+761>:	mov    %rbp,%rcx
<+764>:	lea    (%rax,%r13,1),%rsi
<+768>:	add    %rbp,%r13
<+771>:	call   0xffffffff81779531 <memcpy_from_iter>
<+776>:	sub    %rbp,0x8(%rsp)
<+781>:	je     0xffffffff81779886 <_copy_from_iter+822>
<+783>:	add    %rbp,%r12
<+786>:	mov    $0x1000,%ebp
<+791>:	mov    %rbx,%rdi
<+794>:	call   0xffffffff81777266 <folio_size>
<+799>:	cmp    %rax,%r12
<+802>:	jb     0xffffffff81779828 <_copy_from_iter+728>
<+804>:	lea    0x10(%rsp),%rdi
<+809>:	call   0xffffffff81777d61 <xas_next_entry>
<+814>:	mov    %rax,%rbx
<+817>:	jmp    0xffffffff817797e4 <_copy_from_iter+660>
<+822>:	call   0xffffffff811299a3 <__rcu_read_unlock>
<+827>:	add    %r13,0x8(%r15)

# ITER_DISCARD / default
<+831>:	sub    %r13,0x18(%r15)
<+835>:	mov    0x48(%rsp),%rax
<+840>:	sub    %gs:0x28,%rax
<+849>:	je     0xffffffff817798a8 <_copy_from_iter+856>
<+851>:	call   0xffffffff81d657ac <__stack_chk_fail>
<+856>:	add    $0x50,%rsp
<+860>:	mov    %r13,%rax
<+863>:	pop    %rbx
<+864>:	pop    %rbp
<+865>:	pop    %r12
<+867>:	pop    %r13
<+869>:	pop    %r14
<+871>:	pop    %r15
<+873>:	jmp    0xffffffff81d72960 <__x86_return_thunk>
PATCHED
<+0>:	push   %r15
<+2>:	push   %r14
<+4>:	push   %r13
<+6>:	push   %r12
<+8>:	push   %rbp
<+9>:	push   %rbx
<+10>:	sub    $0x68,%rsp
<+14>:	mov    %rdi,(%rsp)
<+18>:	mov    %gs:0x28,%rax
<+27>:	mov    %rax,0x60(%rsp)
<+32>:	xor    %eax,%eax
<+34>:	cmpb   $0x0,0x3(%rdx)
<+38>:	je     0xffffffff8177931e <_copy_from_iter+67>
<+40>:	mov    0x18(%rdx),%rax
<+44>:	mov    %rdx,%r15
<+47>:	cmp    %rax,%rsi
<+50>:	cmovbe %rsi,%rax
<+54>:	test   %rax,%rax
<+57>:	mov    %rax,%rbx
<+60>:	jne    0xffffffff81779327 <_copy_from_iter+76>
<+62>:	jmp    0xffffffff81779645 <_copy_from_iter+874>
<+67>:	ud2
<+69>:	xor    %ebx,%ebx
<+71>:	jmp    0xffffffff81779645 <_copy_from_iter+874>

# ITER_UBUF
<+76>:	mov    (%rdx),%al
<+78>:	cmp    $0x5,%al
<+80>:	jne    0xffffffff8177934b <_copy_from_iter+112>
<+82>:	mov    0x8(%rdx),%rdi
<+86>:	xor    %esi,%esi
<+88>:	add    0x10(%rdx),%rdi
<+92>:	mov    %rbx,%rdx
<+95>:	mov    (%rsp),%rcx
<+99>:	call   0xffffffff81778207 <copy_from_user_iter>
<+104>:	sub    %rax,%rbx
<+107>:	jmp    0xffffffff8177963d <_copy_from_iter+866>

# ITER_IOVEC
<+112>:	test   %al,%al
<+114>:	jne    0xffffffff817793b4 <_copy_from_iter+217>
<+116>:	mov    0x10(%rdx),%r13
<+120>:	xor    %r14d,%r14d
<+123>:	mov    0x8(%rdx),%r12
<+127>:	mov    0x8(%r13),%rbp
<+131>:	sub    %r12,%rbp
<+134>:	cmp    %rbx,%rbp
<+137>:	cmova  %rbx,%rbp
<+141>:	test   %rbp,%rbp
<+144>:	je     0xffffffff8177939b <_copy_from_iter+192>
<+146>:	mov    0x0(%r13),%rdi
<+150>:	mov    %rbp,%rdx
<+153>:	mov    %r14,%rsi
<+156>:	sub    %rbp,%rbx
<+159>:	mov    (%rsp),%rcx
<+163>:	add    %r12,%rdi
<+166>:	call   0xffffffff81778207 <copy_from_user_iter>
<+171>:	mov    %rbp,%rdx
<+174>:	sub    %rax,%rdx
<+177>:	add    %rax,%rbx
<+180>:	add    %rdx,%r14
<+183>:	add    %r12,%rdx
<+186>:	cmp    0x8(%r13),%rdx
<+190>:	jb     0xffffffff817793ac <_copy_from_iter+209>
<+192>:	add    $0x10,%r13
<+196>:	xor    %r12d,%r12d
<+199>:	test   %rbx,%rbx
<+202>:	jne    0xffffffff8177935a <_copy_from_iter+127>
<+204>:	jmp    0xffffffff817794bb <_copy_from_iter+480>
<+209>:	mov    %rdx,%rbx
<+212>:	jmp    0xffffffff817794bb <_copy_from_iter+480>

# ITER_BVEC
<+217>:	cmp    $0x2,%al
<+219>:	jne    0xffffffff81779457 <_copy_from_iter+380>
<+225>:	mov    0x10(%rdx),%r13
<+229>:	xor    %r12d,%r12d
<+232>:	mov    0x8(%rdx),%r14
<+236>:	mov    0xc(%r13),%ecx
<+240>:	mov    %r12,%rsi
<+243>:	mov    %r15,%r8
<+246>:	add    %r14,%rcx
<+249>:	mov    %rcx,%rdi
<+252>:	and    $0xfff,%ecx
<+258>:	shr    $0xc,%rdi
<+262>:	shl    $0x6,%rdi
<+266>:	add    0x0(%r13),%rdi
<+270>:	call   0xffffffff81777282 <kmap_local_page>
<+275>:	mov    0x8(%r13),%ebp
<+279>:	mov    $0x1000,%edx
<+284>:	lea    (%rax,%rcx,1),%rdi
<+288>:	sub    %r14,%rbp
<+291>:	cmp    %rbx,%rbp
<+294>:	cmova  %rbx,%rbp
<+298>:	sub    %rcx,%rdx
<+301>:	mov    (%rsp),%rcx
<+305>:	cmp    %rdx,%rbp
<+308>:	cmova  %rdx,%rbp
<+312>:	mov    %rbp,%rdx
<+315>:	call   0xffffffff81777934 <memcpy_from_iter_mc>
<+320>:	mov    %rbp,%rdx
<+323>:	sub    %rax,%rdx
<+326>:	add    %rax,%rbx
<+329>:	add    %rdx,%r12
<+332>:	add    %rdx,%r14
<+335>:	mov    0x8(%r13),%edx
<+339>:	sub    %rbp,%rbx
<+342>:	cmp    %rdx,%r14
<+345>:	jb     0xffffffff8177943d <_copy_from_iter+354>
<+347>:	add    $0x10,%r13
<+351>:	xor    %r14d,%r14d
<+354>:	test   %rax,%rax
<+357>:	jne    0xffffffff81779447 <_copy_from_iter+364>
<+359>:	test   %rbx,%rbx
<+362>:	jne    0xffffffff817793c7 <_copy_from_iter+236>
<+364>:	mov    %r13,0x10(%r15)
<+368>:	mov    %r12,%rbx
<+371>:	mov    %r14,0x8(%r15)
<+375>:	jmp    0xffffffff81779641 <_copy_from_iter+870>

# ITER_KVEC
<+380>:	cmp    $0x1,%al
<+382>:	jne    0xffffffff817794cb <_copy_from_iter+496>
<+384>:	mov    0x10(%rdx),%r13
<+388>:	xor    %r14d,%r14d
<+391>:	mov    0x8(%rdx),%r12
<+395>:	mov    0x8(%r13),%rbp
<+399>:	sub    %r12,%rbp
<+402>:	cmp    %rbx,%rbp
<+405>:	cmova  %rbx,%rbp
<+409>:	test   %rbp,%rbp
<+412>:	je     0xffffffff817794aa <_copy_from_iter+463>
<+414>:	mov    0x0(%r13),%rdi
<+418>:	mov    %rbp,%rdx
<+421>:	mov    %r14,%rsi
<+424>:	mov    %r15,%r8
<+427>:	mov    (%rsp),%rcx
<+431>:	sub    %rbp,%rbx
<+434>:	add    %r12,%rdi
<+437>:	call   0xffffffff81777934 <memcpy_from_iter_mc>
<+442>:	mov    %rbp,%rdx
<+445>:	sub    %rax,%rdx
<+448>:	add    %rax,%rbx
<+451>:	add    %rdx,%r14
<+454>:	add    %rdx,%r12
<+457>:	cmp    0x8(%r13),%r12
<+461>:	jb     0xffffffff817794b8 <_copy_from_iter+477>
<+463>:	add    $0x10,%r13
<+467>:	xor    %r12d,%r12d
<+470>:	test   %rbx,%rbx
<+473>:	jne    0xffffffff81779466 <_copy_from_iter+395>
<+475>:	jmp    0xffffffff817794bb <_copy_from_iter+480>
<+477>:	mov    %r12,%rbx
<+480>:	mov    %rbx,0x8(%r15)
<+484>:	mov    %r14,%rbx
<+487>:	mov    %r13,0x10(%r15)
<+491>:	jmp    0xffffffff81779641 <_copy_from_iter+870>

# ITER_XARRAY
<+496>:	cmp    $0x3,%al
<+498>:	jne    0xffffffff81779641 <_copy_from_iter+870>
<+504>:	movq   $0x3,0x40(%rsp)
<+513>:	mov    0x10(%rdx),%rax
<+517>:	mov    0x8(%rdx),%r13
<+521>:	add    0x20(%rdx),%r13
<+525>:	xor    %edx,%edx
<+527>:	mov    %rdx,0x48(%rsp)
<+532>:	mov    %rax,0x28(%rsp)
<+537>:	mov    %rdx,0x50(%rsp)
<+542>:	mov    %r13,%rax
<+545>:	mov    %rdx,0x58(%rsp)
<+550>:	shr    $0xc,%rax
<+554>:	mov    %rax,0x30(%rsp)
<+559>:	xor    %eax,%eax
<+561>:	mov    %eax,0x38(%rsp)
<+565>:	call   0xffffffff81126bcf <__rcu_read_lock>
<+570>:	or     $0xffffffffffffffff,%rsi
<+574>:	lea    0x28(%rsp),%rdi
<+579>:	call   0xffffffff81d5ed2f <xas_find>
<+584>:	xor    %ecx,%ecx
<+586>:	mov    %rax,%rbp
<+589>:	mov    %rcx,0x8(%rsp)
<+594>:	test   %rbp,%rbp
<+597>:	je     0xffffffff81779589 <_copy_from_iter+686>
<+599>:	lea    0x28(%rsp),%rdi
<+604>:	mov    %rbp,%rsi
<+607>:	call   0xffffffff81777300 <xas_retry>
<+612>:	test   %al,%al
<+614>:	jne    0xffffffff81779626 <_copy_from_iter+843>
<+620>:	test   $0x1,%ebp
<+626>:	jne    0xffffffff81779587 <_copy_from_iter+684>
<+628>:	mov    %rbp,%rdi
<+631>:	call   0xffffffff81270e56 <folio_test_hugetlb>
<+636>:	test   %al,%al
<+638>:	jne    0xffffffff81779593 <_copy_from_iter+696>
<+640>:	mov    %rbp,%rdi
<+643>:	mov    %rbx,%r11
<+646>:	call   0xffffffff81777266 <folio_size>
<+651>:	lea    -0x1(%rax),%r12
<+655>:	call   0xffffffff81777266 <folio_size>
<+660>:	and    %r13,%r12
<+663>:	sub    %r12,%rax
<+666>:	cmp    %rbx,%rax
<+669>:	cmova  %rbx,%rax
<+673>:	mov    %rax,%r9
<+676>:	mov    %rax,%r14
<+679>:	jmp    0xffffffff81779617 <_copy_from_iter+828>
<+684>:	ud2
<+686>:	mov    0x8(%rsp),%rbx
<+691>:	jmp    0xffffffff81779638 <_copy_from_iter+861>
<+696>:	ud2
<+698>:	jmp    0xffffffff81779589 <_copy_from_iter+686>
<+700>:	mov    %r12,%rsi
<+703>:	mov    %rbp,%rdi
<+706>:	mov    %r11,0x20(%rsp)
<+711>:	mov    %r15,%r8
<+714>:	mov    %r9,0x18(%rsp)
<+719>:	call   0xffffffff817772a7 <kmap_local_folio>
<+724>:	mov    $0x1000,%edx
<+729>:	mov    (%rsp),%rcx
<+733>:	mov    %rax,%rdi
<+736>:	mov    %r12,%rax
<+739>:	mov    0x8(%rsp),%rsi
<+744>:	and    $0xfff,%eax
<+749>:	sub    %rax,%rdx
<+752>:	cmp    %r14,%rdx
<+755>:	cmova  %r14,%rdx
<+759>:	mov    %rdx,0x10(%rsp)
<+764>:	call   0xffffffff81777934 <memcpy_from_iter_mc>
<+769>:	mov    0x10(%rsp),%rdx
<+774>:	mov    0x8(%rsp),%rbx
<+779>:	mov    %rax,%rsi
<+782>:	mov    0x20(%rsp),%r11
<+787>:	mov    %rdx,%rcx
<+790>:	sub    %rdx,%rsi
<+793>:	sub    %rax,%rcx
<+796>:	add    %rcx,%rbx
<+799>:	add    %rsi,%r11
<+802>:	test   %rax,%rax
<+805>:	jne    0xffffffff81779638 <_copy_from_iter+861>
<+807>:	test   %r11,%r11
<+810>:	je     0xffffffff81779638 <_copy_from_iter+861>
<+812>:	mov    0x18(%rsp),%r9
<+817>:	add    %rsi,%r14
<+820>:	add    %rcx,%r12
<+823>:	mov    %rbx,0x8(%rsp)
<+828>:	test   %r14,%r14
<+831>:	jne    0xffffffff81779597 <_copy_from_iter+700>
<+837>:	add    %r9,%r13
<+840>:	mov    %r11,%rbx
<+843>:	lea    0x28(%rsp),%rdi
<+848>:	call   0xffffffff81777d00 <xas_next_entry>
<+853>:	mov    %rax,%rbp
<+856>:	jmp    0xffffffff8177952d <_copy_from_iter+594>
<+861>:	call   0xffffffff811299a3 <__rcu_read_unlock>
<+866>:	add    %rbx,0x8(%r15)

# ITER_DISCARD / default
<+870>:	sub    %rbx,0x18(%r15)
<+874>:	mov    0x60(%rsp),%rax
<+879>:	sub    %gs:0x28,%rax
<+888>:	je     0xffffffff8177965a <_copy_from_iter+895>
<+890>:	call   0xffffffff81d6572c <__stack_chk_fail>
<+895>:	add    $0x68,%rsp
<+899>:	mov    %rbx,%rax
<+902>:	pop    %rbx
<+903>:	pop    %rbp
<+904>:	pop    %r12
<+906>:	pop    %r13
<+908>:	pop    %r14
<+910>:	pop    %r15
<+912>:	jmp    0xffffffff81d728e0 <__x86_return_thunk>
David Laight Aug. 16, 2023, 8:30 a.m. UTC | #2
From: David Howells
> Sent: 14 August 2023 22:40
> 
> 
> >         _copy_from_iter                          inc 0x36e -> 0x395 +0x27
> 
> Here a disassembly of _copy_from_iter() from unpatched and patched, marked up for
> the different iterator-type branches.  To summarise:
> 
> 		UNPATCHED	PATCHED
> 		START	LEN	START	LEN
> 		=======	=======	=======	=======
> Prologue	0	77	0	76
> UBUF		77	36	76	36
> IOVEC		113	148	112	105
> BVEC		261	159	217	163
> KVEC		420	125	380	116
> XARRAY		545	286	496	374
> DISCARD/Epi	831	42	870	42
> Return		873	-	912	-
> 
> 
> The overall change in the entire file, according to size, is:
>    19855     744       0   20599    5077 build3/lib/iov_iter.o -- before
>    19739     864       0   20603    507b build3/lib/iov_iter.o -- after

It is harder to compare because of some of the random name changes.
The version of the source I found seems to pass priv2 to functions
that don't use it?

Since the functions aren't inlined you get the cost of passing
the parameters.
This seems to affect the common cases.
Is that all left over from a version that passed function pointers
(with the hope they'd be inlined?).
Just directly inlining the simple copies should help.

I rather hope the should_fail_usercopy() and instrument_copy_xxx()
calls are usually either absent or, at most, nops.

This all seems to have a lot fewer options than last time I looked.
Is it worth optimising the KVEC case with a single buffer?

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
David Howells Aug. 16, 2023, 9:50 a.m. UTC | #3
David Laight <David.Laight@ACULAB.COM> wrote:

> It is harder to compare because of some of the random name changes.

I wouldn't say 'random' exactly, but if you prefer, some of the name changing
can be split out into a separate patch.  The macros are kind of the worst
since they picked up variable names from the callers.

> The version of the source I found seems to pass priv2 to functions
> that don't use it?

That can't be avoided if I convert everything to inline functions and function
pointers - but the optimiser can get rid of it where it can inline the step
function.

I tried passing the iterator to the step functions instead, but that just made
things bigger.  memcpy_from_iter_mc() is interesting to deal with.  I would
prefer to deal with it in the caller so we only do the check once, but that
might mean duplicating the caller.

Actually, ->copy_mc is only set in once place, dump_emit_page() in coredump.c,
and only on a bvec iterator, so I could probably do a special function just
for that that calls iterate_bvec() rather than iterate_and_advance2() and then
make _copy_from_iter() just use memcpy_from_iter() and get rid of
iov_iter::copy_mc entirely.

> Since the functions aren't inlined you get the cost of passing
> the parameters.
> This seems to affect the common cases.

Actually, in v2, the action functions for common cases are marked
__always_inline and do get fully inlined and the code in some paths actually
ends up slightly smaller.

> Is that all left over from a version that passed function pointers
> (with the hope they'd be inlined?).
> Just directly inlining the simple copies should help.

I did that in v2 for things like memcpy_to_iter() and memcpy_from_iter().

> I rather hope the should_fail_usercopy() and instrument_copy_xxx()
> calls are usually either absent or, at most, nops.

Okay - it's probably worth marking those too, then.

> This all seems to have a lot fewer options than last time I looked.

I'm not sure what you mean by 'a lot fewer options'?

> Is it worth optimising the KVEC case with a single buffer?

You mean an equivalent of UBUF?  Maybe.  There are probably a whole bunch of
netfs places that do single-kvec writes, though I'm trying to convert these
over to bvec arrays, combining them with their data, and MSG_SPLICE_PAGES.

David
David Laight Aug. 16, 2023, 10:17 a.m. UTC | #4
From: David Howells
> Sent: Wednesday, August 16, 2023 10:50 AM
> 
> David Laight <David.Laight@ACULAB.COM> wrote:
> 
> > It is harder to compare because of some of the random name changes.
> 
> I wouldn't say 'random' exactly, but if you prefer, some of the name changing
> can be split out into a separate patch.  The macros are kind of the worst
> since they picked up variable names from the callers.
> 
> > The version of the source I found seems to pass priv2 to functions
> > that don't use it?
> 
> That can't be avoided if I convert everything to inline functions and function
> pointers - but the optimiser can get rid of it where it can inline the step
> function.

AFAICT the IOVEC one was only called directly.

> I tried passing the iterator to the step functions instead, but that just made
> things bigger.  memcpy_from_iter_mc() is interesting to deal with.  I would
> prefer to deal with it in the caller so we only do the check once, but that
> might mean duplicating the caller.

You could try something slightly horrid that the compiler
might optimise for you.
Instead of passing in a function pointer pass a number.
Then do something like:
#define call_iter(id, ...) \
	(id == x ? fn_x(__VA_ARGS__) : id == y ? fn_y(__VA_ARGS) ...)
constant folding on the inline should kill the function pointer.
You might get away with putting the args on the end.

...
> > I rather hope the should_fail_usercopy() and instrument_copy_xxx()
> > calls are usually either absent or, at most, nops.
> 
> Okay - it's probably worth marking those too, then.

Thinking I'm sure they are KASAN annotations.
The are few enough calls that I suspect that replicating them
won't affect KASAN (etc) builds.

> > This all seems to have a lot fewer options than last time I looked.
> 
> I'm not sure what you mean by 'a lot fewer options'?

It might just be ITER_PIPE that has gone.

> > Is it worth optimising the KVEC case with a single buffer?
> 
> You mean an equivalent of UBUF?  Maybe.  There are probably a whole bunch of
> netfs places that do single-kvec writes, though I'm trying to convert these
> over to bvec arrays, combining them with their data, and MSG_SPLICE_PAGES.

I'm thinking of what happens with kernel callers of things
like the socket code - especially for address/option buffers.
Probably io_uring and bpf (and my out of tree drivers!).

Could be the equivalent of UBUF, but checking for KVEC with
a count of 1 wouldn't really add any more cmp/jmp pairs.

I've also noticed in the past that some of this code seems
to be optimised for zero length buffers/fragments.
Surely they just need to work?

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
David Howells Aug. 16, 2023, 11:19 a.m. UTC | #5
David Laight <David.Laight@ACULAB.COM> wrote:

> > That can't be avoided if I convert everything to inline functions and
> > function pointers - but the optimiser can get rid of it where it can
> > inline the step function.
> 
> AFAICT the IOVEC one was only called directly.

I've made some changes that I'll post shortly, and I now get this:

	...
	<+36>:	cmpb   $0x0,0x1(%rdx)	# iter->copy_mc
	...
	<+46>:	je     0xffffffff81779aae <_copy_from_iter+98>
	<+48>:	jmp    0xffffffff81779a87 <_copy_from_iter+59>
	...
	# Handle ->copy_mc == true
	<+59>:	mov    0x38(%rsp),%rax
	<+64>:	sub    %gs:0x28,%rax
	<+73>:	jne    0xffffffff81779db1 <_copy_from_iter+869>
	<+79>:	add    $0x40,%rsp
	<+83>:	pop    %rbx
	<+84>:	pop    %rbp
	<+85>:	pop    %r12
	<+87>:	pop    %r13
	<+89>:	pop    %r14
	<+91>:	pop    %r15
	<+93>:	jmp    0xffffffff81777934 <__copy_from_iter_mc>
	...
	# ITER_UBUF
	<+121>:	mov    (%rdx),%al
	<+123>:	cmp    $0x5,%al
	<+125>:	jne    0xffffffff81779b01 <_copy_from_iter+181>
	...
	<+147>:	call   0xffffffff817777ee <__access_ok>
	<+152>:	test   %al,%al
	<+154>:	je     0xffffffff81779af9 <_copy_from_iter+173>
	<+156>:	nop
	<+157>:	nop
	<+158>:	nop
	<+159>:	mov    %r12,%rdi
	<+162>:	mov    %rdx,%rsi
	<+165>:	rep movsb %ds:(%rsi),%es:(%rdi)
	...
	# ITER_IOVEC
	<+181>:	test   %al,%al
	<+183>:	jne    0xffffffff81779b8d <_copy_from_iter+321>
	...
	<+234>:	call   0xffffffff817777ee <__access_ok>
	<+239>:	test   %al,%al
	<+241>:	je     0xffffffff81779b54 <_copy_from_iter+264>
	<+243>:	nop
	<+244>:	nop
	<+245>:	nop
	<+246>:	lea    (%r12,%r15,1),%rax
	<+250>:	mov    %r8,%rsi
	<+253>:	mov    %rax,%rdi
	<+256>:	rep movsb %ds:(%rsi),%es:(%rdi)
	...
	# ITER_BVEC
	<+321>:	cmp    $0x2,%al
	<+323>:	jne    0xffffffff81779c1f <_copy_from_iter+467>
	...
	<+375>:	call   0xffffffff81777282 <kmap_local_page>
	...
	<+431>:	rep movsb %ds:(%rsi),%es:(%rdi)
	...
	# ITER_KVEC
	<+467>:	cmp    $0x1,%al
	<+469>:	jne    0xffffffff81779c82 <_copy_from_iter+566>
	...
	<+526>:	rep movsb %ds:(%rsi),%es:(%rdi)
	...
	# ITER_XARRAY
	<+566>:	cmp    $0x3,%al
	<+568>:	jne    0xffffffff81779d9d <_copy_from_iter+849>
	...
	<+639>:	call   0xffffffff81126bcf <__rcu_read_lock>
	...
	<+651>:	call   0xffffffff81d5ed97 <xas_find>
	...
	<+764>:	call   0xffffffff817772a7 <kmap_local_folio>
	...
	<+806>:	rep movsb %ds:(%rsi),%es:(%rdi)
	...
	# ITER_DISCARD/default
	<+849>:	sub    %rbx,0x18(%rbp)
	<+853>:	mov    0x38(%rsp),%rax
	<+858>:	sub    %gs:0x28,%rax
	<+867>:	je     0xffffffff81779db6 <_copy_from_iter+874>
	<+869>:	call   0xffffffff81d6578c <__stack_chk_fail>
	<+874>:	add    $0x40,%rsp
	<+878>:	mov    %rbx,%rax
	<+881>:	pop    %rbx
	<+882>:	pop    %rbp
	<+883>:	pop    %r12
	<+885>:	pop    %r13
	<+887>:	pop    %r14
	<+889>:	pop    %r15
	<+891>:	jmp    0xffffffff81d72920 <__x86_return_thunk>

David
diff mbox series

Patch

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 424737045b97..d8f915d890aa 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -14,188 +14,260 @@ 
 #include <linux/scatterlist.h>
 #include <linux/instrumented.h>
 
-/* covers ubuf and kbuf alike */
-#define iterate_buf(i, n, base, len, off, __p, STEP) {		\
-	size_t __maybe_unused off = 0;				\
-	len = n;						\
-	base = __p + i->iov_offset;				\
-	len -= (STEP);						\
-	i->iov_offset += len;					\
-	n = len;						\
-}
-
-/* covers iovec and kvec alike */
-#define iterate_iovec(i, n, base, len, off, __p, STEP) {	\
-	size_t off = 0;						\
-	size_t skip = i->iov_offset;				\
-	do {							\
-		len = min(n, __p->iov_len - skip);		\
-		if (likely(len)) {				\
-			base = __p->iov_base + skip;		\
-			len -= (STEP);				\
-			off += len;				\
-			skip += len;				\
-			n -= len;				\
-			if (skip < __p->iov_len)		\
-				break;				\
-		}						\
-		__p++;						\
-		skip = 0;					\
-	} while (n);						\
-	i->iov_offset = skip;					\
-	n = off;						\
-}
-
-#define iterate_bvec(i, n, base, len, off, p, STEP) {		\
-	size_t off = 0;						\
-	unsigned skip = i->iov_offset;				\
-	while (n) {						\
-		unsigned offset = p->bv_offset + skip;		\
-		unsigned left;					\
-		void *kaddr = kmap_local_page(p->bv_page +	\
-					offset / PAGE_SIZE);	\
-		base = kaddr + offset % PAGE_SIZE;		\
-		len = min(min(n, (size_t)(p->bv_len - skip)),	\
-		     (size_t)(PAGE_SIZE - offset % PAGE_SIZE));	\
-		left = (STEP);					\
-		kunmap_local(kaddr);				\
-		len -= left;					\
-		off += len;					\
-		skip += len;					\
-		if (skip == p->bv_len) {			\
-			skip = 0;				\
-			p++;					\
-		}						\
-		n -= len;					\
-		if (left)					\
-			break;					\
-	}							\
-	i->iov_offset = skip;					\
-	n = off;						\
-}
-
-#define iterate_xarray(i, n, base, len, __off, STEP) {		\
-	__label__ __out;					\
-	size_t __off = 0;					\
-	struct folio *folio;					\
-	loff_t start = i->xarray_start + i->iov_offset;		\
-	pgoff_t index = start / PAGE_SIZE;			\
-	XA_STATE(xas, i->xarray, index);			\
-								\
-	len = PAGE_SIZE - offset_in_page(start);		\
-	rcu_read_lock();					\
-	xas_for_each(&xas, folio, ULONG_MAX) {			\
-		unsigned left;					\
-		size_t offset;					\
-		if (xas_retry(&xas, folio))			\
-			continue;				\
-		if (WARN_ON(xa_is_value(folio)))		\
-			break;					\
-		if (WARN_ON(folio_test_hugetlb(folio)))		\
-			break;					\
-		offset = offset_in_folio(folio, start + __off);	\
-		while (offset < folio_size(folio)) {		\
-			base = kmap_local_folio(folio, offset);	\
-			len = min(n, len);			\
-			left = (STEP);				\
-			kunmap_local(base);			\
-			len -= left;				\
-			__off += len;				\
-			n -= len;				\
-			if (left || n == 0)			\
-				goto __out;			\
-			offset += len;				\
-			len = PAGE_SIZE;			\
-		}						\
-	}							\
-__out:								\
-	rcu_read_unlock();					\
-	i->iov_offset += __off;					\
-	n = __off;						\
-}
-
-#define __iterate_and_advance(i, n, base, len, off, I, K) {	\
-	if (unlikely(i->count < n))				\
-		n = i->count;					\
-	if (likely(n)) {					\
-		if (likely(iter_is_ubuf(i))) {			\
-			void __user *base;			\
-			size_t len;				\
-			iterate_buf(i, n, base, len, off,	\
-						i->ubuf, (I)) 	\
-		} else if (likely(iter_is_iovec(i))) {		\
-			const struct iovec *iov = iter_iov(i);	\
-			void __user *base;			\
-			size_t len;				\
-			iterate_iovec(i, n, base, len, off,	\
-						iov, (I))	\
-			i->nr_segs -= iov - iter_iov(i);	\
-			i->__iov = iov;				\
-		} else if (iov_iter_is_bvec(i)) {		\
-			const struct bio_vec *bvec = i->bvec;	\
-			void *base;				\
-			size_t len;				\
-			iterate_bvec(i, n, base, len, off,	\
-						bvec, (K))	\
-			i->nr_segs -= bvec - i->bvec;		\
-			i->bvec = bvec;				\
-		} else if (iov_iter_is_kvec(i)) {		\
-			const struct kvec *kvec = i->kvec;	\
-			void *base;				\
-			size_t len;				\
-			iterate_iovec(i, n, base, len, off,	\
-						kvec, (K))	\
-			i->nr_segs -= kvec - i->kvec;		\
-			i->kvec = kvec;				\
-		} else if (iov_iter_is_xarray(i)) {		\
-			void *base;				\
-			size_t len;				\
-			iterate_xarray(i, n, base, len, off,	\
-							(K))	\
-		}						\
-		i->count -= n;					\
-	}							\
-}
-#define iterate_and_advance(i, n, base, len, off, I, K) \
-	__iterate_and_advance(i, n, base, len, off, I, ((void)(K),0))
-
-static int copyout(void __user *to, const void *from, size_t n)
+typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len,
+			     void *priv, void *priv2);
+typedef size_t (*iov_ustep_f)(void __user *iter_base, size_t progress, size_t len,
+			      void *priv, void *priv2);
+
+static __always_inline
+size_t iterate_ubuf(struct iov_iter *iter, size_t len, void *priv, void *priv2,
+		    iov_ustep_f step)
 {
-	if (should_fail_usercopy())
-		return n;
-	if (access_ok(to, n)) {
-		instrument_copy_to_user(to, from, n);
-		n = raw_copy_to_user(to, from, n);
+	void __user *base = iter->ubuf;
+	size_t progress = 0, remain;
+
+	remain = step(base + iter->iov_offset, 0, len, priv, priv2);
+	progress = len - remain;
+	iter->iov_offset += progress;
+	return progress;
+}
+
+static __always_inline
+size_t iterate_iovec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
+		     iov_ustep_f step)
+{
+	const struct iovec *p = iter->__iov;
+	size_t progress = 0, skip = iter->iov_offset;
+
+	do {
+		size_t remain, consumed;
+		size_t part = min(len, p->iov_len - skip);
+
+		if (likely(part)) {
+			remain = step(p->iov_base + skip, progress, part, priv, priv2);
+			consumed = part - remain;
+			progress += consumed;
+			skip += consumed;
+			len -= consumed;
+			if (skip < p->iov_len)
+				break;
+		}
+		p++;
+		skip = 0;
+	} while (len);
+
+	iter->__iov = p;
+	iter->nr_segs -= p - iter->__iov;
+	iter->iov_offset = skip;
+	return progress;
+}
+
+static __always_inline
+size_t iterate_kvec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
+		    iov_step_f step)
+{
+	const struct kvec *p = iter->kvec;
+	size_t progress = 0, skip = iter->iov_offset;
+
+	do {
+		size_t remain, consumed;
+		size_t part = min(len, p->iov_len - skip);
+
+		if (likely(part)) {
+			remain = step(p->iov_base + skip, progress, part, priv, priv2);
+			consumed = part - remain;
+			progress += consumed;
+			skip += consumed;
+			len -= consumed;
+			if (skip < p->iov_len)
+				break;
+		}
+		p++;
+		skip = 0;
+	} while (len);
+
+	iter->kvec = p;
+	iter->nr_segs -= p - iter->kvec;
+	iter->iov_offset = skip;
+	return progress;
+}
+
+static __always_inline
+size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
+		    iov_step_f step)
+{
+	const struct bio_vec *p = iter->bvec;
+	size_t progress = 0, skip = iter->iov_offset;
+
+	do {
+		size_t remain, consumed;
+		size_t offset = p->bv_offset + skip, part;
+		void *kaddr = kmap_local_page(p->bv_page + offset / PAGE_SIZE);
+
+		part = min3(len,
+			   (size_t)(p->bv_len - skip),
+			   (size_t)(PAGE_SIZE - offset % PAGE_SIZE));
+		remain = step(kaddr + offset % PAGE_SIZE, progress, part, priv, priv2);
+		kunmap_local(kaddr);
+		consumed = part - remain;
+		len -= consumed;
+		progress += consumed;
+		skip += consumed;
+		if (skip >= p->bv_len) {
+			skip = 0;
+			p++;
+		}
+		if (remain)
+			break;
+	} while (len);
+
+	iter->bvec = p;
+	iter->nr_segs -= p - iter->bvec;
+	iter->iov_offset = skip;
+	return progress;
+}
+
+static __always_inline
+size_t iterate_xarray(struct iov_iter *iter, size_t len, void *priv, void *priv2,
+		      iov_step_f step)
+{
+	struct folio *folio;
+	size_t progress = 0;
+	loff_t start = iter->xarray_start + iter->iov_offset;
+	pgoff_t index = start / PAGE_SIZE;
+	XA_STATE(xas, iter->xarray, index);
+
+	rcu_read_lock();
+	xas_for_each(&xas, folio, ULONG_MAX) {
+		size_t remain, consumed, offset, part, flen;
+
+		if (xas_retry(&xas, folio))
+			continue;
+		if (WARN_ON(xa_is_value(folio)))
+			break;
+		if (WARN_ON(folio_test_hugetlb(folio)))
+			break;
+
+		offset = offset_in_folio(folio, start);
+		flen = min(folio_size(folio) - offset, len);
+		start += flen;
+
+		while (flen) {
+			void *base = kmap_local_folio(folio, offset);
+
+			part = min(flen, PAGE_SIZE - offset_in_page(offset));
+			remain = step(base, progress, part, priv, priv2);
+			kunmap_local(base);
+
+			consumed = part - remain;
+			progress += consumed;
+			len -= consumed;
+
+			if (remain || len == 0)
+				goto out;
+			flen -= consumed;
+			offset += consumed;
+		}
 	}
-	return n;
+
+out:
+	rcu_read_unlock();
+	iter->iov_offset += progress;
+	return progress;
 }
 
-static int copyout_nofault(void __user *to, const void *from, size_t n)
+static __always_inline
+size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv,
+			    void *priv2, iov_ustep_f ustep, iov_step_f step)
 {
-	long res;
+	size_t progress;
 
+	if (unlikely(iter->count < len))
+		len = iter->count;
+	if (unlikely(!len))
+		return 0;
+
+	if (likely(iter_is_ubuf(iter)))
+		progress = iterate_ubuf(iter, len, priv, priv2, ustep);
+	else if (likely(iter_is_iovec(iter)))
+		progress = iterate_iovec(iter, len, priv, priv2, ustep);
+	else if (iov_iter_is_bvec(iter))
+		progress = iterate_bvec(iter, len, priv, priv2, step);
+	else if (iov_iter_is_kvec(iter))
+		progress = iterate_kvec(iter, len, priv, priv2, step);
+	else if (iov_iter_is_xarray(iter))
+		progress = iterate_xarray(iter, len, priv, priv2, step);
+	else
+		progress = len;
+	iter->count -= progress;
+	return progress;
+}
+
+static __always_inline
+size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv,
+			   iov_ustep_f ustep, iov_step_f step)
+{
+	return iterate_and_advance2(iter, len, priv, NULL, ustep, step);
+}
+
+static size_t copy_to_user_iter(void __user *iter_to, size_t progress,
+				size_t len, void *from, void *priv2)
+{
 	if (should_fail_usercopy())
-		return n;
+		return len;
+	if (access_ok(iter_to, len)) {
+		from += progress;
+		instrument_copy_to_user(iter_to, from, len);
+		len = raw_copy_to_user(iter_to, from, len);
+	}
+	return len;
+}
 
-	res = copy_to_user_nofault(to, from, n);
+static size_t copy_to_user_iter_nofault(void __user *iter_to, size_t progress,
+					size_t len, void *from, void *priv2)
+{
+	ssize_t res;
+
+	if (should_fail_usercopy())
+		return len;
 
-	return res < 0 ? n : res;
+	from += progress;
+	res = copy_to_user_nofault(iter_to, from, len);
+	return res < 0 ? len : res;
 }
 
-static int copyin(void *to, const void __user *from, size_t n)
+static size_t copy_from_user_iter(void __user *iter_from, size_t progress,
+				  size_t len, void *to, void *priv2)
 {
-	size_t res = n;
+	size_t res = len;
 
 	if (should_fail_usercopy())
-		return n;
-	if (access_ok(from, n)) {
-		instrument_copy_from_user_before(to, from, n);
-		res = raw_copy_from_user(to, from, n);
-		instrument_copy_from_user_after(to, from, n, res);
+		return len;
+	if (access_ok(iter_from, len)) {
+		to += progress;
+		instrument_copy_from_user_before(to, iter_from, len);
+		res = raw_copy_from_user(to, iter_from, len);
+		instrument_copy_from_user_after(to, iter_from, len, res);
 	}
 	return res;
 }
 
+static __always_inline
+size_t memcpy_to_iter(void *iter_to, size_t progress,
+		      size_t len, void *from, void *priv2)
+{
+	memcpy(iter_to, from + progress, len);
+	return 0;
+}
+
+static __always_inline
+size_t memcpy_from_iter(void *iter_from, size_t progress,
+			size_t len, void *to, void *priv2)
+{
+	memcpy(to + progress, iter_from, len);
+	return 0;
+}
+
 /*
  * fault_in_iov_iter_readable - fault in iov iterator for reading
  * @i: iterator
@@ -313,23 +385,28 @@  size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 		return 0;
 	if (user_backed_iter(i))
 		might_fault();
-	iterate_and_advance(i, bytes, base, len, off,
-		copyout(base, addr + off, len),
-		memcpy(base, addr + off, len)
-	)
-
-	return bytes;
+	return iterate_and_advance(i, bytes, (void *)addr,
+				   copy_to_user_iter, memcpy_to_iter);
 }
 EXPORT_SYMBOL(_copy_to_iter);
 
 #ifdef CONFIG_ARCH_HAS_COPY_MC
-static int copyout_mc(void __user *to, const void *from, size_t n)
+static size_t copy_to_user_iter_mc(void __user *iter_to, size_t progress,
+				   size_t len, void *from, void *priv2)
 {
-	if (access_ok(to, n)) {
-		instrument_copy_to_user(to, from, n);
-		n = copy_mc_to_user((__force void *) to, from, n);
+	if (access_ok(iter_to, len)) {
+		from += progress;
+		instrument_copy_to_user(iter_to, from, len);
+		len = copy_mc_to_user(iter_to, from, len);
 	}
-	return n;
+	return len;
+}
+
+static __always_inline
+size_t memcpy_to_iter_mc(void *iter_to, size_t progress,
+			 size_t len, void *from, void *priv2)
+{
+	return copy_mc_to_kernel(iter_to, from + progress, len);
 }
 
 /**
@@ -362,22 +439,20 @@  size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 		return 0;
 	if (user_backed_iter(i))
 		might_fault();
-	__iterate_and_advance(i, bytes, base, len, off,
-		copyout_mc(base, addr + off, len),
-		copy_mc_to_kernel(base, addr + off, len)
-	)
-
-	return bytes;
+	return iterate_and_advance(i, bytes, (void *)addr,
+				   copy_to_user_iter_mc, memcpy_to_iter_mc);
 }
 EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
 #endif /* CONFIG_ARCH_HAS_COPY_MC */
 
-static void *memcpy_from_iter(struct iov_iter *i, void *to, const void *from,
-				 size_t size)
+static size_t memcpy_from_iter_mc(void *iter_from, size_t progress,
+				  size_t len, void *to, void *priv2)
 {
-	if (iov_iter_is_copy_mc(i))
-		return (void *)copy_mc_to_kernel(to, from, size);
-	return memcpy(to, from, size);
+	struct iov_iter *iter = priv2;
+
+	if (iov_iter_is_copy_mc(iter))
+		return copy_mc_to_kernel(to + progress, iter_from, len);
+	return memcpy_from_iter(iter_from, progress, len, to, priv2);
 }
 
 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
@@ -387,30 +462,46 @@  size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 
 	if (user_backed_iter(i))
 		might_fault();
-	iterate_and_advance(i, bytes, base, len, off,
-		copyin(addr + off, base, len),
-		memcpy_from_iter(i, addr + off, base, len)
-	)
-
-	return bytes;
+	return iterate_and_advance2(i, bytes, addr, i,
+				    copy_from_user_iter,
+				    memcpy_from_iter_mc);
 }
 EXPORT_SYMBOL(_copy_from_iter);
 
+static __always_inline
+size_t copy_from_user_iter_nocache(void __user *iter_from, size_t progress,
+				   size_t len, void *to, void *priv2)
+{
+	return __copy_from_user_inatomic_nocache(to + progress, iter_from, len);
+}
+
 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (WARN_ON_ONCE(!i->data_source))
 		return 0;
 
-	iterate_and_advance(i, bytes, base, len, off,
-		__copy_from_user_inatomic_nocache(addr + off, base, len),
-		memcpy(addr + off, base, len)
-	)
-
-	return bytes;
+	return iterate_and_advance(i, bytes, addr,
+				   copy_from_user_iter_nocache,
+				   memcpy_from_iter);
 }
 EXPORT_SYMBOL(_copy_from_iter_nocache);
 
 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+static __always_inline
+size_t copy_from_user_iter_flushcache(void __user *iter_from, size_t progress,
+				      size_t len, void *to, void *priv2)
+{
+	return __copy_from_user_flushcache(to + progress, iter_from, len);
+}
+
+static __always_inline
+size_t memcpy_from_iter_flushcache(void *iter_from, size_t progress,
+				   size_t len, void *to, void *priv2)
+{
+	memcpy_flushcache(to + progress, iter_from, len);
+	return 0;
+}
+
 /**
  * _copy_from_iter_flushcache - write destination through cpu cache
  * @addr: destination kernel address
@@ -432,12 +523,9 @@  size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 	if (WARN_ON_ONCE(!i->data_source))
 		return 0;
 
-	iterate_and_advance(i, bytes, base, len, off,
-		__copy_from_user_flushcache(addr + off, base, len),
-		memcpy_flushcache(addr + off, base, len)
-	)
-
-	return bytes;
+	return iterate_and_advance(i, bytes, addr,
+				   copy_from_user_iter_flushcache,
+				   memcpy_from_iter_flushcache);
 }
 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
 #endif
@@ -509,10 +597,9 @@  size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t byte
 		void *kaddr = kmap_local_page(page);
 		size_t n = min(bytes, (size_t)PAGE_SIZE - offset);
 
-		iterate_and_advance(i, n, base, len, off,
-			copyout_nofault(base, kaddr + offset + off, len),
-			memcpy(base, kaddr + offset + off, len)
-		)
+		n = iterate_and_advance(i, bytes, kaddr,
+					copy_to_user_iter_nofault,
+					memcpy_to_iter);
 		kunmap_local(kaddr);
 		res += n;
 		bytes -= n;
@@ -555,14 +642,25 @@  size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 }
 EXPORT_SYMBOL(copy_page_from_iter);
 
-size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
+static __always_inline
+size_t zero_to_user_iter(void __user *iter_to, size_t progress,
+			 size_t len, void *priv, void *priv2)
 {
-	iterate_and_advance(i, bytes, base, len, count,
-		clear_user(base, len),
-		memset(base, 0, len)
-	)
+	return clear_user(iter_to, len);
+}
 
-	return bytes;
+static __always_inline
+size_t zero_to_iter(void *iter_to, size_t progress,
+		    size_t len, void *priv, void *priv2)
+{
+	memset(iter_to, 0, len);
+	return 0;
+}
+
+size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
+{
+	return iterate_and_advance(i, bytes, NULL,
+				   zero_to_user_iter, zero_to_iter);
 }
 EXPORT_SYMBOL(iov_iter_zero);
 
@@ -587,10 +685,9 @@  size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
 		}
 
 		p = kmap_atomic(page) + offset;
-		iterate_and_advance(i, n, base, len, off,
-			copyin(p + off, base, len),
-			memcpy_from_iter(i, p + off, base, len)
-		)
+		n = iterate_and_advance2(i, n, p, i,
+					 copy_from_user_iter,
+					 memcpy_from_iter_mc);
 		kunmap_atomic(p);
 		copied += n;
 		offset += n;
@@ -1181,32 +1278,64 @@  ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i,
 }
 EXPORT_SYMBOL(iov_iter_get_pages_alloc2);
 
+static __always_inline
+size_t copy_from_user_iter_csum(void __user *iter_from, size_t progress,
+				size_t len, void *to, void *priv2)
+{
+	__wsum next, *csum = priv2;
+
+	next = csum_and_copy_from_user(iter_from, to + progress, len);
+	*csum = csum_block_add(*csum, next, progress);
+	return next ? 0 : len;
+}
+
+static __always_inline
+size_t memcpy_from_iter_csum(void *iter_from, size_t progress,
+			     size_t len, void *to, void *priv2)
+{
+	__wsum *csum = priv2;
+
+	*csum = csum_and_memcpy(to + progress, iter_from, len, *csum, progress);
+	return 0;
+}
+
 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
 			       struct iov_iter *i)
 {
-	__wsum sum, next;
-	sum = *csum;
 	if (WARN_ON_ONCE(!i->data_source))
 		return 0;
-
-	iterate_and_advance(i, bytes, base, len, off, ({
-		next = csum_and_copy_from_user(base, addr + off, len);
-		sum = csum_block_add(sum, next, off);
-		next ? 0 : len;
-	}), ({
-		sum = csum_and_memcpy(addr + off, base, len, sum, off);
-	})
-	)
-	*csum = sum;
-	return bytes;
+	return iterate_and_advance2(i, bytes, addr, csum,
+				    copy_from_user_iter_csum,
+				    memcpy_from_iter_csum);
 }
 EXPORT_SYMBOL(csum_and_copy_from_iter);
 
+static __always_inline
+size_t copy_to_user_iter_csum(void __user *iter_to, size_t progress,
+			      size_t len, void *from, void *priv2)
+{
+	__wsum next, *csum = priv2;
+
+	next = csum_and_copy_to_user(from + progress, iter_to, len);
+	*csum = csum_block_add(*csum, next, progress);
+	return next ? 0 : len;
+}
+
+static __always_inline
+size_t memcpy_to_iter_csum(void *iter_to, size_t progress,
+			   size_t len, void *from, void *priv2)
+{
+	__wsum *csum = priv2;
+
+	*csum = csum_and_memcpy(iter_to, from + progress, len, *csum, progress);
+	return 0;
+}
+
 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
 			     struct iov_iter *i)
 {
 	struct csum_state *csstate = _csstate;
-	__wsum sum, next;
+	__wsum sum;
 
 	if (WARN_ON_ONCE(i->data_source))
 		return 0;
@@ -1220,14 +1349,10 @@  size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
 	}
 
 	sum = csum_shift(csstate->csum, csstate->off);
-	iterate_and_advance(i, bytes, base, len, off, ({
-		next = csum_and_copy_to_user(addr + off, base, len);
-		sum = csum_block_add(sum, next, off);
-		next ? 0 : len;
-	}), ({
-		sum = csum_and_memcpy(base, addr + off, len, sum, off);
-	})
-	)
+	
+	bytes = iterate_and_advance2(i, bytes, (void *)addr, &sum,
+				     copy_to_user_iter_csum,
+				     memcpy_to_iter_csum);
 	csstate->csum = csum_shift(sum, csstate->off);
 	csstate->off += bytes;
 	return bytes;