diff mbox series

[1/4] module: Add module_subinit{_noexit} and module_subeixt helper macros

Message ID 20240723083239.41533-2-youling.tang@linux.dev (mailing list archive)
State Handled Elsewhere
Headers show
Series Add module_subinit{_noexit} and module_subeixt helper macros | expand

Commit Message

Youling Tang July 23, 2024, 8:32 a.m. UTC
From: Youling Tang <tangyouling@kylinos.cn>

In theory init/exit should match their sequence, thus normally they should
look like this:
-------------------------+------------------------
    init_A();            |
    init_B();            |
    init_C();            |
                         |   exit_C();
                         |   exit_B();
                         |   exit_A();

Providing module_subinit{_noexit} and module_subeixt helps macros ensure
that modules init/exit match their order, while also simplifying the code.

The three macros are defined as follows:
- module_subinit(initfn, exitfn,rollback)
- module_subinit_noexit(initfn, rollback)
- module_subexit(rollback)

`initfn` is the initialization function and `exitfn` is the corresponding
exit function.

Signed-off-by: Youling Tang <tangyouling@kylinos.cn>
---
 include/asm-generic/vmlinux.lds.h |  5 +++
 include/linux/init.h              | 62 ++++++++++++++++++++++++++++++-
 include/linux/module.h            | 22 +++++++++++
 3 files changed, 88 insertions(+), 1 deletion(-)

Comments

Mika Penttilä July 23, 2024, 9:58 a.m. UTC | #1
On 7/23/24 11:32, Youling Tang wrote:
> From: Youling Tang <tangyouling@kylinos.cn>
>
> In theory init/exit should match their sequence, thus normally they should
> look like this:
> -------------------------+------------------------
>     init_A();            |
>     init_B();            |
>     init_C();            |
>                          |   exit_C();
>                          |   exit_B();
>                          |   exit_A();
>
> Providing module_subinit{_noexit} and module_subeixt helps macros ensure
> that modules init/exit match their order, while also simplifying the code.
>
> The three macros are defined as follows:
> - module_subinit(initfn, exitfn,rollback)
> - module_subinit_noexit(initfn, rollback)
> - module_subexit(rollback)
>
> `initfn` is the initialization function and `exitfn` is the corresponding
> exit function.
>
> Signed-off-by: Youling Tang <tangyouling@kylinos.cn>
> ---
>  include/asm-generic/vmlinux.lds.h |  5 +++
>  include/linux/init.h              | 62 ++++++++++++++++++++++++++++++-
>  include/linux/module.h            | 22 +++++++++++
>  3 files changed, 88 insertions(+), 1 deletion(-)
>
> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> index 677315e51e54..48ccac7c6448 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -927,6 +927,10 @@
>  		INIT_CALLS_LEVEL(7)					\
>  		__initcall_end = .;
>  
> +#define SUBINIT_CALL							\
> +		*(.subinitcall.init)					\
> +		*(.subexitcall.exit)
> +
>  #define CON_INITCALL							\
>  	BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end)
>  
> @@ -1155,6 +1159,7 @@
>  		INIT_DATA						\
>  		INIT_SETUP(initsetup_align)				\
>  		INIT_CALLS						\
> +		SUBINIT_CALL						\
>  		CON_INITCALL						\
>  		INIT_RAM_FS						\
>  	}
> diff --git a/include/linux/init.h b/include/linux/init.h
> index ee1309473bc6..e8689ff2cb6c 100644
> --- a/include/linux/init.h
> +++ b/include/linux/init.h
> @@ -55,6 +55,9 @@
>  #define __exitdata	__section(".exit.data")
>  #define __exit_call	__used __section(".exitcall.exit")
>  
> +#define __subinit_call	__used __section(".subinitcall.init")
> +#define __subexit_call	__used __section(".subexitcall.exit")
> +
>  /*
>   * modpost check for section mismatches during the kernel build.
>   * A section mismatch happens when there are references from a
> @@ -115,6 +118,9 @@
>  typedef int (*initcall_t)(void);
>  typedef void (*exitcall_t)(void);
>  
> +typedef int (*subinitcall_t)(void);
> +typedef void (*subexitcall_t)(void);
> +
>  #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
>  typedef int initcall_entry_t;
>  
> @@ -183,7 +189,61 @@ extern struct module __this_module;
>  #endif
>  
>  #endif
> -  
> +
> +#ifndef __ASSEMBLY__
> +struct subexitcall_rollback {
> +	/*
> +	 * Records the address of the first sub-initialization function in the
> +	 * ".subexitcall.exit" section
> +	 */
> +	unsigned long first_addr;
> +	int ncalls;
> +};
> +
> +static inline void __subexitcall_rollback(struct subexitcall_rollback *r)
> +{
> +	unsigned long addr = r->first_addr - sizeof(r->first_addr) * (r->ncalls - 1);
> +
> +	for (; r->ncalls--; addr += sizeof(r->first_addr)) {
> +		unsigned long *tmp = (void *)addr;
> +		subexitcall_t fn = (subexitcall_t)*tmp;
> +		fn();
> +	}
> +}

How does this guarantee the exit calls match sequence? Are you assuming
linker puts exit functions in reverse order?


--Mika
Christoph Hellwig July 23, 2024, 2:33 p.m. UTC | #2
On Tue, Jul 23, 2024 at 04:32:36PM +0800, Youling Tang wrote:
> Providing module_subinit{_noexit} and module_subeixt helps macros ensure
> that modules init/exit match their order, while also simplifying the code.
> 
> The three macros are defined as follows:
> - module_subinit(initfn, exitfn,rollback)
> - module_subinit_noexit(initfn, rollback)
> - module_subexit(rollback)
> 
> `initfn` is the initialization function and `exitfn` is the corresponding
> exit function.

I find the interface a little confusing.  What I would have expected
is to:

 - have the module_subinit call at file scope instead of in the
   module_init helper, similar to module_init/module_exit
 - thus keep the rollback state explicitly in the module structure or
   similar so that the driver itself doesn't need to care about at
   all, and thus remove the need for the module_subexit call.
Youling Tang July 24, 2024, 1:20 a.m. UTC | #3
Hi, Mika

On 23/07/2024 17:58, Mika Penttilä wrote:
> On 7/23/24 11:32, Youling Tang wrote:
>> From: Youling Tang <tangyouling@kylinos.cn>
>>
>> In theory init/exit should match their sequence, thus normally they should
>> look like this:
>> -------------------------+------------------------
>>      init_A();            |
>>      init_B();            |
>>      init_C();            |
>>                           |   exit_C();
>>                           |   exit_B();
>>                           |   exit_A();
>>
>> Providing module_subinit{_noexit} and module_subeixt helps macros ensure
>> that modules init/exit match their order, while also simplifying the code.
>>
>> The three macros are defined as follows:
>> - module_subinit(initfn, exitfn,rollback)
>> - module_subinit_noexit(initfn, rollback)
>> - module_subexit(rollback)
>>
>> `initfn` is the initialization function and `exitfn` is the corresponding
>> exit function.
>>
>> Signed-off-by: Youling Tang <tangyouling@kylinos.cn>
>> ---
>>   include/asm-generic/vmlinux.lds.h |  5 +++
>>   include/linux/init.h              | 62 ++++++++++++++++++++++++++++++-
>>   include/linux/module.h            | 22 +++++++++++
>>   3 files changed, 88 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
>> index 677315e51e54..48ccac7c6448 100644
>> --- a/include/asm-generic/vmlinux.lds.h
>> +++ b/include/asm-generic/vmlinux.lds.h
>> @@ -927,6 +927,10 @@
>>   		INIT_CALLS_LEVEL(7)					\
>>   		__initcall_end = .;
>>   
>> +#define SUBINIT_CALL							\
>> +		*(.subinitcall.init)					\
>> +		*(.subexitcall.exit)
>> +
>>   #define CON_INITCALL							\
>>   	BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end)
>>   
>> @@ -1155,6 +1159,7 @@
>>   		INIT_DATA						\
>>   		INIT_SETUP(initsetup_align)				\
>>   		INIT_CALLS						\
>> +		SUBINIT_CALL						\
>>   		CON_INITCALL						\
>>   		INIT_RAM_FS						\
>>   	}
>> diff --git a/include/linux/init.h b/include/linux/init.h
>> index ee1309473bc6..e8689ff2cb6c 100644
>> --- a/include/linux/init.h
>> +++ b/include/linux/init.h
>> @@ -55,6 +55,9 @@
>>   #define __exitdata	__section(".exit.data")
>>   #define __exit_call	__used __section(".exitcall.exit")
>>   
>> +#define __subinit_call	__used __section(".subinitcall.init")
>> +#define __subexit_call	__used __section(".subexitcall.exit")
>> +
>>   /*
>>    * modpost check for section mismatches during the kernel build.
>>    * A section mismatch happens when there are references from a
>> @@ -115,6 +118,9 @@
>>   typedef int (*initcall_t)(void);
>>   typedef void (*exitcall_t)(void);
>>   
>> +typedef int (*subinitcall_t)(void);
>> +typedef void (*subexitcall_t)(void);
>> +
>>   #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
>>   typedef int initcall_entry_t;
>>   
>> @@ -183,7 +189,61 @@ extern struct module __this_module;
>>   #endif
>>   
>>   #endif
>> -
>> +
>> +#ifndef __ASSEMBLY__
>> +struct subexitcall_rollback {
>> +	/*
>> +	 * Records the address of the first sub-initialization function in the
>> +	 * ".subexitcall.exit" section
>> +	 */
>> +	unsigned long first_addr;
>> +	int ncalls;
>> +};
>> +
>> +static inline void __subexitcall_rollback(struct subexitcall_rollback *r)
>> +{
>> +	unsigned long addr = r->first_addr - sizeof(r->first_addr) * (r->ncalls - 1);
>> +
>> +	for (; r->ncalls--; addr += sizeof(r->first_addr)) {
>> +		unsigned long *tmp = (void *)addr;
>> +		subexitcall_t fn = (subexitcall_t)*tmp;
>> +		fn();
>> +	}
>> +}
> How does this guarantee the exit calls match sequence? Are you assuming
> linker puts exit functions in reverse order?
Take btrfs for example:
Initialize the function sequentially in init_btrfs_fs() using
module_subinit{_noexit}, storing the corresponding function addresses
in the specified ".subinitcall.init" and ".subexitcall.exit" sections.

Using gcc to compile btrfs to.ko, the view section contains the following:
```
$ objdump -d -j ".subinitcall.init" fs/btrfs/super.o

fs/btrfs/super.o:     file format elf64-x86-64


Disassembly of section .subinitcall.init:

0000000000000000 <__subinitcall_register_btrfs.0>:
     ...

0000000000000008 <__subinitcall_btrfs_run_sanity_tests.2>:
     ...

0000000000000010 <__subinitcall_btrfs_print_mod_info.3>:
     ...

0000000000000018 <__subinitcall_btrfs_interface_init.4>:
     ...

0000000000000020 <__subinitcall_btrfs_prelim_ref_init.6>:
     ...

0000000000000028 <__subinitcall_btrfs_delayed_ref_init.8>:
     ...

0000000000000030 <__subinitcall_btrfs_auto_defrag_init.10>:
     ...

0000000000000038 <__subinitcall_btrfs_delayed_inode_init.12>:
     ...

0000000000000040 <__subinitcall_ordered_data_init.14>:
     ...

0000000000000048 <__subinitcall_extent_map_init.16>:
     ...

0000000000000050 <__subinitcall_btrfs_bioset_init.18>:
     ...

0000000000000058 <__subinitcall_extent_buffer_init_cachep.20>:
     ...

0000000000000060 <__subinitcall_extent_state_init_cachep.22>:
     ...

0000000000000068 <__subinitcall_btrfs_free_space_init.24>:
     ...

0000000000000070 <__subinitcall_btrfs_ctree_init.26>:
     ...

0000000000000078 <__subinitcall_btrfs_transaction_init.28>:
     ...

0000000000000080 <__subinitcall_btrfs_init_dio.30>:
     ...

0000000000000088 <__subinitcall_btrfs_init_cachep.32>:
     ...

0000000000000090 <__subinitcall_btrfs_init_compress.34>:
     ...

0000000000000098 <__subinitcall_btrfs_init_sysfs.36>:
     ...

00000000000000a0 <__subinitcall_btrfs_props_init.38>:
     ...

```

```
$ objdump -d -j ".subexitcall.exit" fs/btrfs/super.o

fs/btrfs/super.o:     file format elf64-x86-64


Disassembly of section .subexitcall.exit:

0000000000000000 <__subexitcall_unregister_btrfs.1>:
     ...

0000000000000008 <__subexitcall_btrfs_interface_exit.5>:
     ...

0000000000000010 <__subexitcall_btrfs_prelim_ref_exit.7>:
     ...

0000000000000018 <__subexitcall_btrfs_delayed_ref_exit.9>:
     ...

0000000000000020 <__subexitcall_btrfs_auto_defrag_exit.11>:
     ...

0000000000000028 <__subexitcall_btrfs_delayed_inode_exit.13>:
     ...

0000000000000030 <__subexitcall_ordered_data_exit.15>:
     ...

0000000000000038 <__subexitcall_extent_map_exit.17>:
     ...

0000000000000040 <__subexitcall_btrfs_bioset_exit.19>:
     ...

0000000000000048 <__subexitcall_extent_buffer_free_cachep.21>:
     ...

0000000000000050 <__subexitcall_extent_state_free_cachep.23>:
     ...

0000000000000058 <__subexitcall_btrfs_free_space_exit.25>:
     ...

0000000000000060 <__subexitcall_btrfs_ctree_exit.27>:
     ...

0000000000000068 <__subexitcall_btrfs_transaction_exit.29>:
     ...

0000000000000070 <__subexitcall_btrfs_destroy_dio.31>:
     ...

0000000000000078 <__subexitcall_btrfs_destroy_cachep.33>:
     ...

0000000000000080 <__subexitcall_btrfs_exit_compress.35>:
     ...

0000000000000088 <__subexitcall_btrfs_exit_sysfs.37>:
     ...


```

 From the above, we can see that the compiler stores the init/exit function
in reverse order.

Thanks,
Youling.
Youling Tang July 24, 2024, 1:57 a.m. UTC | #4
Hi, Christoph

On 23/07/2024 22:33, Christoph Hellwig wrote:
> On Tue, Jul 23, 2024 at 04:32:36PM +0800, Youling Tang wrote:
>> Providing module_subinit{_noexit} and module_subeixt helps macros ensure
>> that modules init/exit match their order, while also simplifying the code.
>>
>> The three macros are defined as follows:
>> - module_subinit(initfn, exitfn,rollback)
>> - module_subinit_noexit(initfn, rollback)
>> - module_subexit(rollback)
>>
>> `initfn` is the initialization function and `exitfn` is the corresponding
>> exit function.
> I find the interface a little confusing.  What I would have expected
> is to:
>
>   - have the module_subinit call at file scope instead of in the
>     module_init helper, similar to module_init/module_exit
>   - thus keep the rollback state explicitly in the module structure or
>     similar so that the driver itself doesn't need to care about at
>     all, and thus remove the need for the module_subexit call.
module_init(initfn)/module_exit(exitfn) has two definitions (via MODULE):
- buindin: uses do_initcalls() to iterate over the contents of the specified
   section and executes all initfn functions in the section in the order in
   which they are stored (exitfn is not required).

- ko: run do_init_module(mod)->do_one_initcall(mod->init) to execute initfn
   of the specified module.

If we change module_subinit to something like this, not called in
module_init,
```
static int init_a(void)
{
     ...
     return 0;
}
static void exit_a(void)
{
     ...
}
subinitcall(init_a, exit_a);

static int init_b(void)
{
     ...
     return 0;
}
static void exit_b(void)
{
     ...
}
subinitcall(init_b, exit_b);
```

Not only do we want to ensure that exit is executed in reverse order of
init, but we also want to ensure the order of init.

This does not guarantee the order in which init will be executed (although
the init/exit order will remain the same)

Tanks,
Youling.
Christoph Hellwig July 24, 2024, 3:43 p.m. UTC | #5
On Wed, Jul 24, 2024 at 09:57:05AM +0800, Youling Tang wrote:
> module_init(initfn)/module_exit(exitfn) has two definitions (via MODULE):
> - buindin: uses do_initcalls() to iterate over the contents of the specified
>   section and executes all initfn functions in the section in the order in
>   which they are stored (exitfn is not required).
> 
> - ko: run do_init_module(mod)->do_one_initcall(mod->init) to execute initfn
>   of the specified module.
> 
> If we change module_subinit to something like this, not called in
> module_init,

> Not only do we want to ensure that exit is executed in reverse order of
> init, but we also want to ensure the order of init.

Yes.

> This does not guarantee the order in which init will be executed (although
> the init/exit order will remain the same)

Hmm, so the normal built-in initcalls depend on the link order, but when
they are in the same file, the compiler can reorder them before we even
get to the linker.

I wonder what a good syntax would be to still avoid the boilerplate
code.  We'd probably need one macro to actually define the init/exit
table in a single statement so that it can't be reordered, but that
would lose the ability to actually declare the module subinit/exit
handlers in multiple files, which really is the biggest win of this
scheme as it allows to keep the functions static instead of exposing
them to other compilation units.

And in fact even in your three converted file systems, most
subinit/exit handler are in separate files, so maybe instead
enforcing that there is just one per file and slightly refactoring
the code so that this is the case might be the best option?
Youling Tang July 25, 2024, 3:01 a.m. UTC | #6
On 24/07/2024 23:43, Christoph Hellwig wrote:
> On Wed, Jul 24, 2024 at 09:57:05AM +0800, Youling Tang wrote:
>> module_init(initfn)/module_exit(exitfn) has two definitions (via MODULE):
>> - buindin: uses do_initcalls() to iterate over the contents of the specified
>>    section and executes all initfn functions in the section in the order in
>>    which they are stored (exitfn is not required).
>>
>> - ko: run do_init_module(mod)->do_one_initcall(mod->init) to execute initfn
>>    of the specified module.
>>
>> If we change module_subinit to something like this, not called in
>> module_init,
>> Not only do we want to ensure that exit is executed in reverse order of
>> init, but we also want to ensure the order of init.
> Yes.
>
>> This does not guarantee the order in which init will be executed (although
>> the init/exit order will remain the same)
> Hmm, so the normal built-in initcalls depend on the link order, but when
> they are in the same file, the compiler can reorder them before we even
> get to the linker.
>
> I wonder what a good syntax would be to still avoid the boilerplate
> code.  We'd probably need one macro to actually define the init/exit
> table in a single statement so that it can't be reordered, but that
> would lose the ability to actually declare the module subinit/exit
> handlers in multiple files, which really is the biggest win of this
> scheme as it allows to keep the functions static instead of exposing
> them to other compilation units.
>
> And in fact even in your three converted file systems, most
> subinit/exit handler are in separate files, so maybe instead
> enforcing that there is just one per file and slightly refactoring
> the code so that this is the case might be the best option?
- It doesn't feel good to have only one subinit/exit in a file.
   Assuming that there is only one file in each file, how do we
   ensure that the files are linked in order?(Is it sorted by *.o
   in the Makefile?)

- Even if the order of each init is linked correctly, then the
   runtime will be iterated through the .subinitcall.init section,
   which executes each initfn in sequence (similar to do_initcalls),
   which means that no other code can be inserted between each subinit.


If module_subinit is called in module_init, other code can be inserted
between subinit, similar to the following:

```
static int __init init_example(void)
{
     module_subinit(inita, exita);

     otherthing...

     module_subinit(initb, exitb);

     return 0;
}

module_init(init_example);
```

IMHO, module_subinit() might be better called in module_init().

Thanks,
Youling.
Christoph Hellwig July 25, 2024, 2:39 p.m. UTC | #7
On Thu, Jul 25, 2024 at 11:01:33AM +0800, Youling Tang wrote:
> - It doesn't feel good to have only one subinit/exit in a file.
>   Assuming that there is only one file in each file, how do we
>   ensure that the files are linked in order?(Is it sorted by *.o
>   in the Makefile?)

Yes, link order already matterns for initialization order for built-in
code, so this is a well known concept.

> - Even if the order of each init is linked correctly, then the
>   runtime will be iterated through the .subinitcall.init section,
>   which executes each initfn in sequence (similar to do_initcalls),
>   which means that no other code can be inserted between each subinit.

I don't understand this comment.  What do you mean with no other
code could be inserted?

> If module_subinit is called in module_init, other code can be inserted
> between subinit, similar to the following:
> 
> ```
> static int __init init_example(void)
> {
>     module_subinit(inita, exita);
> 
>     otherthing...
> 
>     module_subinit(initb, exitb);
> 
>     return 0;
> }

Yikes.  That's really not the point of having init calls, but just
really, really convoluted control flow.

> module_init(init_example);
> ```
> 
> IMHO, module_subinit() might be better called in module_init().

I strongly disagree.
Arnd Bergmann July 25, 2024, 3:30 p.m. UTC | #8
On Thu, Jul 25, 2024, at 16:39, Christoph Hellwig wrote:
> On Thu, Jul 25, 2024 at 11:01:33AM +0800, Youling Tang wrote:
>> - It doesn't feel good to have only one subinit/exit in a file.
>>   Assuming that there is only one file in each file, how do we
>>   ensure that the files are linked in order?(Is it sorted by *.o
>>   in the Makefile?)
>
> Yes, link order already matterns for initialization order for built-in
> code, so this is a well known concept.

Note: I removed the old way of entering a module a few
years ago, which allowed simply defining a function called
init_module(). The last one of these was a07d8ecf6b39
("ethernet: isa: convert to module_init/module_exit").

Now I think we could just make the module_init() macro
do the same thing as a built-in initcall() and put
an entry in a special section, to let you have multiple
entry points in a loadable module.

There are still at least two problems though:

- while link order is defined between files in a module,
  I don't think there is any guarantee for the order between
  two initcalls of the same level within a single file.

- For built-in code we don't have to worry about matching
  the order of the exit calls since they don't exist there.
  As I understand, the interesting part of this patch
  series is about making sure the order matches between
  init and exit, so there still needs to be a way to
  express a pair of such calls.

     Arnd
Christoph Hellwig July 25, 2024, 3:34 p.m. UTC | #9
On Thu, Jul 25, 2024 at 05:30:58PM +0200, Arnd Bergmann wrote:
> Now I think we could just make the module_init() macro
> do the same thing as a built-in initcall() and put
> an entry in a special section, to let you have multiple
> entry points in a loadable module.
> 
> There are still at least two problems though:
> 
> - while link order is defined between files in a module,
>   I don't think there is any guarantee for the order between
>   two initcalls of the same level within a single file.

I think the sanest answer is to only allow one per file.  If you
are in the same file anyway calling one function from the other
is not a big burden.  It really is when they are spread over files
when it is annoying, and the three examples show that pretty
clearly.

> - For built-in code we don't have to worry about matching
>   the order of the exit calls since they don't exist there.
>   As I understand, the interesting part of this patch
>   series is about making sure the order matches between
>   init and exit, so there still needs to be a way to
>   express a pair of such calls.

That's why you want a single macro to define the init and exit
callbacks, so that the order can be matched up and so that
error unwinding can use the relative position easily.
Goffredo Baroncelli July 25, 2024, 5:14 p.m. UTC | #10
On 25/07/2024 17.34, Christoph Hellwig wrote:
> On Thu, Jul 25, 2024 at 05:30:58PM +0200, Arnd Bergmann wrote:
>> Now I think we could just make the module_init() macro
>> do the same thing as a built-in initcall() and put
>> an entry in a special section, to let you have multiple
>> entry points in a loadable module.
>>
>> There are still at least two problems though:
>>
>> - while link order is defined between files in a module,
>>    I don't think there is any guarantee for the order between
>>    two initcalls of the same level within a single file.
> 
> I think the sanest answer is to only allow one per file.  If you
> are in the same file anyway calling one function from the other
> is not a big burden.  It really is when they are spread over files
> when it is annoying, and the three examples show that pretty
> clearly.
> 
>> - For built-in code we don't have to worry about matching
>>    the order of the exit calls since they don't exist there.
>>    As I understand, the interesting part of this patch
>>    series is about making sure the order matches between
>>    init and exit, so there still needs to be a way to
>>    express a pair of such calls.
> 
> That's why you want a single macro to define the init and exit
> callbacks, so that the order can be matched up and so that
> error unwinding can use the relative position easily.
> 

Instead of relying to the "expected" order of the compiler/linker,
why doesn't manage the chain explicitly ? Something like:

struct __subexitcall_node {
	void (*exitfn)(void);
	struct subexitcall_node  *next;

static inline void __subexitcall_rollback(struct __subexitcall_node *p)
{
	while (p) {
		p->exitfn();
		p = p->next;
	}
}

#define __subinitcall_noexit(initfn, rollback)					\
do {										\
	int _ret;								\
	_ret = initfn();							\
	if (_ret < 0) {								\
		__subexitcall_rollback(rollback);				\
		return _ret;							\
	}									\
} while (0)

#define __subinitcall(initfn, exitfn, rollback)						\
do {											\
	static subexitcall_node node = {exitfn, rollback->head};	                \
	__subinitcall_noexit(initfn, rollback);						\
	rollback = &node;
} while (0)


#define MODULE_SUBINIT_INIT(rollback) \
	struct __subexitcall_node	*rollback = NULL

#define MODULE_SUBINIT_CALL(initfn, exitfn, rollback) \
	__subinitcall(initfn, exitfn, rollback)

#define MODULE_SUBINIT_CALL_NOEXIT(initfn, rollback) \
	__subinitcall_noexit(initfn, rollback)

#define MODULE_SUBEXIT(rollback) 		\
do {						\
	__subexitcall_rollback(rollback);	\
	rollback = NULL;			\
} while(0)

usage:

	MODULE_SUBINIT_INIT(rollback);

	
	MODULE_SUBINIT_CALL(init_a, exit_a, rollback);
	MODULE_SUBINIT_CALL(init_b, exit_b, rollback);
	MODULE_SUBINIT_CALL_NOEXIT(init_c, rollback);


	MODULE_SUBEXIT(rollback);

this would cost +1 pointer for each function. But this would save from situation like

	r = init_a();
	if (r)
		init_b();
	init_c();
Christoph Hellwig July 25, 2024, 7:46 p.m. UTC | #11
On Thu, Jul 25, 2024 at 07:14:14PM +0200, Goffredo Baroncelli wrote:
> Instead of relying to the "expected" order of the compiler/linker,
> why doesn't manage the chain explicitly ? Something like:

Because that doesn't actually solve anything over simple direct calls
as you still need the symbols to be global?

As an example here is a very hacky patch that just compiles with unused
variable warnings and doesn't work at all to show how the distributed
module_subinit would improve ext4, the file system with the least
subinit calls of the three converted in the series, and without any
extra cleanups like removing now unneded includes or moving more stuff
into subinits if they can remain entirely static that way:

 11 files changed, 61 insertions(+), 114 deletions(-)

diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 87ee3a17bd29c9..87f0ccd06fc069 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -29,7 +29,7 @@ struct ext4_system_zone {
 
 static struct kmem_cache *ext4_system_zone_cachep;
 
-int __init ext4_init_system_zone(void)
+static int __init ext4_init_system_zone(void)
 {
 	ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0);
 	if (ext4_system_zone_cachep == NULL)
@@ -37,11 +37,12 @@ int __init ext4_init_system_zone(void)
 	return 0;
 }
 
-void ext4_exit_system_zone(void)
+static void ext4_exit_system_zone(void)
 {
 	rcu_barrier();
 	kmem_cache_destroy(ext4_system_zone_cachep);
 }
+module_subinit(ext4_init_system_zone, ext4_exit_system_zone);
 
 static inline int can_merge(struct ext4_system_zone *entry1,
 		     struct ext4_system_zone *entry2)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 08acd152261ed8..db81f18cdc3266 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2915,8 +2915,6 @@ void ext4_fc_del(struct inode *inode);
 bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t block);
 void ext4_fc_replay_cleanup(struct super_block *sb);
 int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
-int __init ext4_fc_init_dentry_cache(void);
-void ext4_fc_destroy_dentry_cache(void);
 int ext4_fc_record_regions(struct super_block *sb, int ino,
 			   ext4_lblk_t lblk, ext4_fsblk_t pblk,
 			   int len, int replay);
@@ -2930,8 +2928,6 @@ extern void ext4_mb_release(struct super_block *);
 extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
 				struct ext4_allocation_request *, int *);
 extern void ext4_discard_preallocations(struct inode *);
-extern int __init ext4_init_mballoc(void);
-extern void ext4_exit_mballoc(void);
 extern ext4_group_t ext4_mb_prefetch(struct super_block *sb,
 				     ext4_group_t group,
 				     unsigned int nr, int *cnt);
@@ -3651,8 +3647,6 @@ static inline void ext4_set_de_type(struct super_block *sb,
 /* readpages.c */
 extern int ext4_mpage_readpages(struct inode *inode,
 		struct readahead_control *rac, struct folio *folio);
-extern int __init ext4_init_post_read_processing(void);
-extern void ext4_exit_post_read_processing(void);
 
 /* symlink.c */
 extern const struct inode_operations ext4_encrypted_symlink_inode_operations;
@@ -3663,14 +3657,10 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
 extern void ext4_notify_error_sysfs(struct ext4_sb_info *sbi);
 extern int ext4_register_sysfs(struct super_block *sb);
 extern void ext4_unregister_sysfs(struct super_block *sb);
-extern int __init ext4_init_sysfs(void);
-extern void ext4_exit_sysfs(void);
 
 /* block_validity */
 extern void ext4_release_system_zone(struct super_block *sb);
 extern int ext4_setup_system_zone(struct super_block *sb);
-extern int __init ext4_init_system_zone(void);
-extern void ext4_exit_system_zone(void);
 extern int ext4_inode_block_valid(struct inode *inode,
 				  ext4_fsblk_t start_blk,
 				  unsigned int count);
@@ -3750,8 +3740,6 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
 			     __u64 len, __u64 *moved_len);
 
 /* page-io.c */
-extern int __init ext4_init_pageio(void);
-extern void ext4_exit_pageio(void);
 extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
 extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end);
 extern int ext4_put_io_end(ext4_io_end_t *io_end);
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 17dcf13adde275..d381ec88441ffd 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -156,19 +156,6 @@ static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
 			    ext4_lblk_t len,
 			    struct pending_reservation **prealloc);
 
-int __init ext4_init_es(void)
-{
-	ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT);
-	if (ext4_es_cachep == NULL)
-		return -ENOMEM;
-	return 0;
-}
-
-void ext4_exit_es(void)
-{
-	kmem_cache_destroy(ext4_es_cachep);
-}
-
 void ext4_es_init_tree(struct ext4_es_tree *tree)
 {
 	tree->root = RB_ROOT;
@@ -1883,18 +1870,25 @@ static void ext4_print_pending_tree(struct inode *inode)
 #define ext4_print_pending_tree(inode)
 #endif
 
-int __init ext4_init_pending(void)
+static int __init ext4_init_es(void)
 {
 	ext4_pending_cachep = KMEM_CACHE(pending_reservation, SLAB_RECLAIM_ACCOUNT);
 	if (ext4_pending_cachep == NULL)
 		return -ENOMEM;
+	ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT);
+	if (ext4_es_cachep == NULL) {
+		kmem_cache_destroy(ext4_pending_cachep);
+		return -ENOMEM;
+	}
 	return 0;
 }
 
-void ext4_exit_pending(void)
+static void ext4_exit_es(void)
 {
+	kmem_cache_destroy(ext4_es_cachep);
 	kmem_cache_destroy(ext4_pending_cachep);
 }
+module_subinit(ext4_init_es, ext4_exit_es);
 
 void ext4_init_pending_tree(struct ext4_pending_tree *tree)
 {
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 3c8e2edee5d5d1..1cdb25c3d2dae5 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -123,8 +123,6 @@ struct ext4_pending_tree {
 	struct rb_root root;
 };
 
-extern int __init ext4_init_es(void);
-extern void ext4_exit_es(void);
 extern void ext4_es_init_tree(struct ext4_es_tree *tree);
 
 extern void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
@@ -244,8 +242,6 @@ extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
 
 extern int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v);
 
-extern int __init ext4_init_pending(void);
-extern void ext4_exit_pending(void);
 extern void ext4_init_pending_tree(struct ext4_pending_tree *tree);
 extern void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk);
 extern bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk);
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 3926a05eceeed1..b28930c4175cca 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -2293,7 +2293,7 @@ int ext4_fc_info_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-int __init ext4_fc_init_dentry_cache(void)
+static int __init ext4_fc_init_dentry_cache(void)
 {
 	ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
 					   SLAB_RECLAIM_ACCOUNT);
@@ -2304,7 +2304,8 @@ int __init ext4_fc_init_dentry_cache(void)
 	return 0;
 }
 
-void ext4_fc_destroy_dentry_cache(void)
+static void ext4_fc_destroy_dentry_cache(void)
 {
 	kmem_cache_destroy(ext4_fc_dentry_cachep);
 }
+module_subinit(ext4_fc_init_dentry_cache, ext4_fc_destroy_dentry_cache);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 9dda9cd68ab2f5..a564882432b8ff 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3936,7 +3936,7 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
 	}
 }
 
-int __init ext4_init_mballoc(void)
+static int __init ext4_init_mballoc(void)
 {
 	ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
 					SLAB_RECLAIM_ACCOUNT);
@@ -3963,7 +3963,7 @@ int __init ext4_init_mballoc(void)
 	return -ENOMEM;
 }
 
-void ext4_exit_mballoc(void)
+static void ext4_exit_mballoc(void)
 {
 	/*
 	 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
@@ -3975,6 +3975,7 @@ void ext4_exit_mballoc(void)
 	kmem_cache_destroy(ext4_free_data_cachep);
 	ext4_groupinfo_destroy_slabs();
 }
+module_subinit(ext4_init_mballoc, ext4_exit_mballoc);
 
 #define EXT4_MB_BITMAP_MARKED_CHECK 0x0001
 #define EXT4_MB_SYNC_UPDATE 0x0002
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index ad5543866d2152..68639d5553080a 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -33,7 +33,7 @@
 static struct kmem_cache *io_end_cachep;
 static struct kmem_cache *io_end_vec_cachep;
 
-int __init ext4_init_pageio(void)
+static int __init ext4_init_pageio(void)
 {
 	io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
 	if (io_end_cachep == NULL)
@@ -47,11 +47,12 @@ int __init ext4_init_pageio(void)
 	return 0;
 }
 
-void ext4_exit_pageio(void)
+static void ext4_exit_pageio(void)
 {
 	kmem_cache_destroy(io_end_cachep);
 	kmem_cache_destroy(io_end_vec_cachep);
 }
+module_subinit(ext4_init_pageio, ext4_exit_pageio);
 
 struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end)
 {
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 8494492582abea..5fa7329c571b42 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -390,7 +390,7 @@ int ext4_mpage_readpages(struct inode *inode,
 	return 0;
 }
 
-int __init ext4_init_post_read_processing(void)
+static int __init ext4_init_post_read_processing(void)
 {
 	bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, SLAB_RECLAIM_ACCOUNT);
 
@@ -409,8 +409,9 @@ int __init ext4_init_post_read_processing(void)
 	return -ENOMEM;
 }
 
-void ext4_exit_post_read_processing(void)
+static void ext4_exit_post_read_processing(void)
 {
 	mempool_destroy(bio_post_read_ctx_pool);
 	kmem_cache_destroy(bio_post_read_ctx_cache);
 }
+module_subinit(ext4_init_post_read_processing, ext4_exit_post_read_processing);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 207076e7e7f055..bb6a87da00ea8c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1484,29 +1484,6 @@ static void init_once(void *foo)
 	ext4_fc_init_inode(&ei->vfs_inode);
 }
 
-static int __init init_inodecache(void)
-{
-	ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
-				sizeof(struct ext4_inode_info), 0,
-				SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
-				offsetof(struct ext4_inode_info, i_data),
-				sizeof_field(struct ext4_inode_info, i_data),
-				init_once);
-	if (ext4_inode_cachep == NULL)
-		return -ENOMEM;
-	return 0;
-}
-
-static void destroy_inodecache(void)
-{
-	/*
-	 * Make sure all delayed rcu free inodes are flushed before we
-	 * destroy cache.
-	 */
-	rcu_barrier();
-	kmem_cache_destroy(ext4_inode_cachep);
-}
-
 void ext4_clear_inode(struct inode *inode)
 {
 	ext4_fc_del(inode);
@@ -7302,33 +7279,13 @@ static struct file_system_type ext4_fs_type = {
 };
 MODULE_ALIAS_FS("ext4");
 
-static int register_ext(void)
-{
-	register_as_ext3();
-	register_as_ext2();
-	return register_filesystem(&ext4_fs_type);
-}
-
-static void unregister_ext(void)
-{
-	unregister_as_ext2();
-	unregister_as_ext3();
-	unregister_filesystem(&ext4_fs_type);
-}
-
-static struct subexitcall_rollback rollback;
-
-static void __exit ext4_exit_fs(void)
-{
-	ext4_destroy_lazyinit_thread();
-	module_subexit(&rollback);
-}
-
 /* Shared across all ext4 file systems */
 wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
 
 static int __init ext4_init_fs(void)
 {
+	int error;
+
 	ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
 	ext4_li_info = NULL;
 
@@ -7338,23 +7295,40 @@ static int __init ext4_init_fs(void)
 	for (int i = 0; i < EXT4_WQ_HASH_SZ; i++)
 		init_waitqueue_head(&ext4__ioend_wq[i]);
 
-	module_subinit(ext4_init_es, ext4_exit_es, &rollback);
-	module_subinit(ext4_init_pending, ext4_exit_pending, &rollback);
-	module_subinit(ext4_init_post_read_processing, ext4_exit_post_read_processing, &rollback);
-	module_subinit(ext4_init_pageio, ext4_exit_pageio, &rollback);
-	module_subinit(ext4_init_system_zone, ext4_exit_system_zone, &rollback);
-	module_subinit(ext4_init_sysfs, ext4_exit_sysfs, &rollback);
-	module_subinit(ext4_init_mballoc, ext4_exit_mballoc, &rollback);
-	module_subinit(init_inodecache, destroy_inodecache, &rollback);
-	module_subinit(ext4_fc_init_dentry_cache, ext4_fc_destroy_dentry_cache, &rollback);
-	module_subinit(register_ext, unregister_ext, &rollback);
+	ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
+				sizeof(struct ext4_inode_info), 0,
+				SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
+				offsetof(struct ext4_inode_info, i_data),
+				sizeof_field(struct ext4_inode_info, i_data),
+				init_once);
+	if (ext4_inode_cachep == NULL)
+		return -ENOMEM;
 
-	return 0;
+	register_as_ext3();
+	register_as_ext2();
+	error = register_filesystem(&ext4_fs_type);
+	if (error)
+		kmem_cache_destroy(ext4_inode_cachep);
+	return error;
+}
+
+static void __exit ext4_exit_fs(void)
+{
+	ext4_destroy_lazyinit_thread();
+	unregister_as_ext2();
+	unregister_as_ext3();
+	unregister_filesystem(&ext4_fs_type);
+
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
+	kmem_cache_destroy(ext4_inode_cachep);
 }
+module_subinit(ext4_init_fs, ext4_exit_fs);
 
 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
 MODULE_DESCRIPTION("Fourth Extended Filesystem");
 MODULE_LICENSE("GPL");
 MODULE_SOFTDEP("pre: crc32c");
-module_init(ext4_init_fs)
-module_exit(ext4_exit_fs)
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index ddb54608ca2ef6..df3096a9e6e39a 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -601,7 +601,7 @@ void ext4_unregister_sysfs(struct super_block *sb)
 	kobject_del(&sbi->s_kobj);
 }
 
-int __init ext4_init_sysfs(void)
+static int __init ext4_init_sysfs(void)
 {
 	int ret;
 
@@ -632,7 +632,7 @@ int __init ext4_init_sysfs(void)
 	return ret;
 }
 
-void ext4_exit_sysfs(void)
+static void ext4_exit_sysfs(void)
 {
 	kobject_put(ext4_feat);
 	ext4_feat = NULL;
@@ -641,4 +641,4 @@ void ext4_exit_sysfs(void)
 	remove_proc_entry(proc_dirname, NULL);
 	ext4_proc_root = NULL;
 }
-
+module_subinit(ext4_init_sysfs, ext4_exit_sysfs);
diff --git a/include/linux/module.h b/include/linux/module.h
index 95f7c60dede9a4..3099fb2c3d813b 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -80,23 +80,13 @@ extern void cleanup_module(void);
  * module_subinit() - Called when the driver is subinitialized
  * @initfn: The subinitialization function that is called
  * @exitfn: Corresponding exit function
- * @rollback: Record information when the subinitialization failed
- *            or the driver was removed
  *
  * Use module_subinit_noexit() when there is only an subinitialization
  * function but no corresponding exit function.
  */
-#define module_subinit(initfn, exitfn, rollback)	\
-	__subinitcall(initfn, exitfn, rollback);
+#define module_subinit(initfn, exitfn)
 
-#define module_subinit_noexit(initfn, rollback)		\
-	__subinitcall_noexit(initfn, rollback);
-
-/*
- * module_subexit() - Called when the driver exits
- */
-#define module_subexit(rollback)			\
-	__subexitcall_rollback(rollback);
+#define module_subinit_noexit(initfn)
 
 #ifndef MODULE
 /**
Youling Tang July 26, 2024, 8:54 a.m. UTC | #12
On 26/07/2024 03:46, Christoph Hellwig wrote:
> On Thu, Jul 25, 2024 at 07:14:14PM +0200, Goffredo Baroncelli wrote:
>> Instead of relying to the "expected" order of the compiler/linker,
>> why doesn't manage the chain explicitly ? Something like:
> Because that doesn't actually solve anything over simple direct calls
> as you still need the symbols to be global?
>
> As an example here is a very hacky patch that just compiles with unused
> variable warnings and doesn't work at all to show how the distributed
> module_subinit would improve ext4, the file system with the least
> subinit calls of the three converted in the series, and without any
> extra cleanups like removing now unneded includes or moving more stuff
> into subinits if they can remain entirely static that way:
>
>   11 files changed, 61 insertions(+), 114 deletions(-)
>
> diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
> index 87ee3a17bd29c9..87f0ccd06fc069 100644
> --- a/fs/ext4/block_validity.c
> +++ b/fs/ext4/block_validity.c
> @@ -29,7 +29,7 @@ struct ext4_system_zone {
>   
>   static struct kmem_cache *ext4_system_zone_cachep;
>   
> -int __init ext4_init_system_zone(void)
> +static int __init ext4_init_system_zone(void)
>   {
>   	ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0);
>   	if (ext4_system_zone_cachep == NULL)
> @@ -37,11 +37,12 @@ int __init ext4_init_system_zone(void)
>   	return 0;
>   }
>   
> -void ext4_exit_system_zone(void)
> +static void ext4_exit_system_zone(void)
>   {
>   	rcu_barrier();
>   	kmem_cache_destroy(ext4_system_zone_cachep);
>   }
> +module_subinit(ext4_init_system_zone, ext4_exit_system_zone);
>   
>   static inline int can_merge(struct ext4_system_zone *entry1,
>   		     struct ext4_system_zone *entry2)
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 08acd152261ed8..db81f18cdc3266 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2915,8 +2915,6 @@ void ext4_fc_del(struct inode *inode);
>   bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t block);
>   void ext4_fc_replay_cleanup(struct super_block *sb);
>   int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
> -int __init ext4_fc_init_dentry_cache(void);
> -void ext4_fc_destroy_dentry_cache(void);
>   int ext4_fc_record_regions(struct super_block *sb, int ino,
>   			   ext4_lblk_t lblk, ext4_fsblk_t pblk,
>   			   int len, int replay);
> @@ -2930,8 +2928,6 @@ extern void ext4_mb_release(struct super_block *);
>   extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
>   				struct ext4_allocation_request *, int *);
>   extern void ext4_discard_preallocations(struct inode *);
> -extern int __init ext4_init_mballoc(void);
> -extern void ext4_exit_mballoc(void);
>   extern ext4_group_t ext4_mb_prefetch(struct super_block *sb,
>   				     ext4_group_t group,
>   				     unsigned int nr, int *cnt);
> @@ -3651,8 +3647,6 @@ static inline void ext4_set_de_type(struct super_block *sb,
>   /* readpages.c */
>   extern int ext4_mpage_readpages(struct inode *inode,
>   		struct readahead_control *rac, struct folio *folio);
> -extern int __init ext4_init_post_read_processing(void);
> -extern void ext4_exit_post_read_processing(void);
>   
>   /* symlink.c */
>   extern const struct inode_operations ext4_encrypted_symlink_inode_operations;
> @@ -3663,14 +3657,10 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
>   extern void ext4_notify_error_sysfs(struct ext4_sb_info *sbi);
>   extern int ext4_register_sysfs(struct super_block *sb);
>   extern void ext4_unregister_sysfs(struct super_block *sb);
> -extern int __init ext4_init_sysfs(void);
> -extern void ext4_exit_sysfs(void);
>   
>   /* block_validity */
>   extern void ext4_release_system_zone(struct super_block *sb);
>   extern int ext4_setup_system_zone(struct super_block *sb);
> -extern int __init ext4_init_system_zone(void);
> -extern void ext4_exit_system_zone(void);
>   extern int ext4_inode_block_valid(struct inode *inode,
>   				  ext4_fsblk_t start_blk,
>   				  unsigned int count);
> @@ -3750,8 +3740,6 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
>   			     __u64 len, __u64 *moved_len);
>   
>   /* page-io.c */
> -extern int __init ext4_init_pageio(void);
> -extern void ext4_exit_pageio(void);
>   extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
>   extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end);
>   extern int ext4_put_io_end(ext4_io_end_t *io_end);
> diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
> index 17dcf13adde275..d381ec88441ffd 100644
> --- a/fs/ext4/extents_status.c
> +++ b/fs/ext4/extents_status.c
> @@ -156,19 +156,6 @@ static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
>   			    ext4_lblk_t len,
>   			    struct pending_reservation **prealloc);
>   
> -int __init ext4_init_es(void)
> -{
> -	ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT);
> -	if (ext4_es_cachep == NULL)
> -		return -ENOMEM;
> -	return 0;
> -}
> -
> -void ext4_exit_es(void)
> -{
> -	kmem_cache_destroy(ext4_es_cachep);
> -}
> -
>   void ext4_es_init_tree(struct ext4_es_tree *tree)
>   {
>   	tree->root = RB_ROOT;
> @@ -1883,18 +1870,25 @@ static void ext4_print_pending_tree(struct inode *inode)
>   #define ext4_print_pending_tree(inode)
>   #endif
>   
> -int __init ext4_init_pending(void)
> +static int __init ext4_init_es(void)
>   {
>   	ext4_pending_cachep = KMEM_CACHE(pending_reservation, SLAB_RECLAIM_ACCOUNT);
>   	if (ext4_pending_cachep == NULL)
>   		return -ENOMEM;
> +	ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT);
> +	if (ext4_es_cachep == NULL) {
> +		kmem_cache_destroy(ext4_pending_cachep);
> +		return -ENOMEM;
> +	}
>   	return 0;
>   }
>   
> -void ext4_exit_pending(void)
> +static void ext4_exit_es(void)
>   {
> +	kmem_cache_destroy(ext4_es_cachep);
>   	kmem_cache_destroy(ext4_pending_cachep);
>   }
> +module_subinit(ext4_init_es, ext4_exit_es);
>   
>   void ext4_init_pending_tree(struct ext4_pending_tree *tree)
>   {
> diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
> index 3c8e2edee5d5d1..1cdb25c3d2dae5 100644
> --- a/fs/ext4/extents_status.h
> +++ b/fs/ext4/extents_status.h
> @@ -123,8 +123,6 @@ struct ext4_pending_tree {
>   	struct rb_root root;
>   };
>   
> -extern int __init ext4_init_es(void);
> -extern void ext4_exit_es(void);
>   extern void ext4_es_init_tree(struct ext4_es_tree *tree);
>   
>   extern void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
> @@ -244,8 +242,6 @@ extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
>   
>   extern int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v);
>   
> -extern int __init ext4_init_pending(void);
> -extern void ext4_exit_pending(void);
>   extern void ext4_init_pending_tree(struct ext4_pending_tree *tree);
>   extern void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk);
>   extern bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk);
> diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
> index 3926a05eceeed1..b28930c4175cca 100644
> --- a/fs/ext4/fast_commit.c
> +++ b/fs/ext4/fast_commit.c
> @@ -2293,7 +2293,7 @@ int ext4_fc_info_show(struct seq_file *seq, void *v)
>   	return 0;
>   }
>   
> -int __init ext4_fc_init_dentry_cache(void)
> +static int __init ext4_fc_init_dentry_cache(void)
>   {
>   	ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
>   					   SLAB_RECLAIM_ACCOUNT);
> @@ -2304,7 +2304,8 @@ int __init ext4_fc_init_dentry_cache(void)
>   	return 0;
>   }
>   
> -void ext4_fc_destroy_dentry_cache(void)
> +static void ext4_fc_destroy_dentry_cache(void)
>   {
>   	kmem_cache_destroy(ext4_fc_dentry_cachep);
>   }
> +module_subinit(ext4_fc_init_dentry_cache, ext4_fc_destroy_dentry_cache);
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 9dda9cd68ab2f5..a564882432b8ff 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -3936,7 +3936,7 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
>   	}
>   }
>   
> -int __init ext4_init_mballoc(void)
> +static int __init ext4_init_mballoc(void)
>   {
>   	ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
>   					SLAB_RECLAIM_ACCOUNT);
> @@ -3963,7 +3963,7 @@ int __init ext4_init_mballoc(void)
>   	return -ENOMEM;
>   }
>   
> -void ext4_exit_mballoc(void)
> +static void ext4_exit_mballoc(void)
>   {
>   	/*
>   	 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
> @@ -3975,6 +3975,7 @@ void ext4_exit_mballoc(void)
>   	kmem_cache_destroy(ext4_free_data_cachep);
>   	ext4_groupinfo_destroy_slabs();
>   }
> +module_subinit(ext4_init_mballoc, ext4_exit_mballoc);
>   
>   #define EXT4_MB_BITMAP_MARKED_CHECK 0x0001
>   #define EXT4_MB_SYNC_UPDATE 0x0002
> diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
> index ad5543866d2152..68639d5553080a 100644
> --- a/fs/ext4/page-io.c
> +++ b/fs/ext4/page-io.c
> @@ -33,7 +33,7 @@
>   static struct kmem_cache *io_end_cachep;
>   static struct kmem_cache *io_end_vec_cachep;
>   
> -int __init ext4_init_pageio(void)
> +static int __init ext4_init_pageio(void)
>   {
>   	io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
>   	if (io_end_cachep == NULL)
> @@ -47,11 +47,12 @@ int __init ext4_init_pageio(void)
>   	return 0;
>   }
>   
> -void ext4_exit_pageio(void)
> +static void ext4_exit_pageio(void)
>   {
>   	kmem_cache_destroy(io_end_cachep);
>   	kmem_cache_destroy(io_end_vec_cachep);
>   }
> +module_subinit(ext4_init_pageio, ext4_exit_pageio);
>   
>   struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end)
>   {
> diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
> index 8494492582abea..5fa7329c571b42 100644
> --- a/fs/ext4/readpage.c
> +++ b/fs/ext4/readpage.c
> @@ -390,7 +390,7 @@ int ext4_mpage_readpages(struct inode *inode,
>   	return 0;
>   }
>   
> -int __init ext4_init_post_read_processing(void)
> +static int __init ext4_init_post_read_processing(void)
>   {
>   	bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, SLAB_RECLAIM_ACCOUNT);
>   
> @@ -409,8 +409,9 @@ int __init ext4_init_post_read_processing(void)
>   	return -ENOMEM;
>   }
>   
> -void ext4_exit_post_read_processing(void)
> +static void ext4_exit_post_read_processing(void)
>   {
>   	mempool_destroy(bio_post_read_ctx_pool);
>   	kmem_cache_destroy(bio_post_read_ctx_cache);
>   }
> +module_subinit(ext4_init_post_read_processing, ext4_exit_post_read_processing);
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 207076e7e7f055..bb6a87da00ea8c 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1484,29 +1484,6 @@ static void init_once(void *foo)
>   	ext4_fc_init_inode(&ei->vfs_inode);
>   }
>   
> -static int __init init_inodecache(void)
> -{
> -	ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
> -				sizeof(struct ext4_inode_info), 0,
> -				SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
> -				offsetof(struct ext4_inode_info, i_data),
> -				sizeof_field(struct ext4_inode_info, i_data),
> -				init_once);
> -	if (ext4_inode_cachep == NULL)
> -		return -ENOMEM;
> -	return 0;
> -}
> -
> -static void destroy_inodecache(void)
> -{
> -	/*
> -	 * Make sure all delayed rcu free inodes are flushed before we
> -	 * destroy cache.
> -	 */
> -	rcu_barrier();
> -	kmem_cache_destroy(ext4_inode_cachep);
> -}
> -
>   void ext4_clear_inode(struct inode *inode)
>   {
>   	ext4_fc_del(inode);
> @@ -7302,33 +7279,13 @@ static struct file_system_type ext4_fs_type = {
>   };
>   MODULE_ALIAS_FS("ext4");
>   
> -static int register_ext(void)
> -{
> -	register_as_ext3();
> -	register_as_ext2();
> -	return register_filesystem(&ext4_fs_type);
> -}
> -
> -static void unregister_ext(void)
> -{
> -	unregister_as_ext2();
> -	unregister_as_ext3();
> -	unregister_filesystem(&ext4_fs_type);
> -}
> -
> -static struct subexitcall_rollback rollback;
> -
> -static void __exit ext4_exit_fs(void)
> -{
> -	ext4_destroy_lazyinit_thread();
> -	module_subexit(&rollback);
> -}
> -
>   /* Shared across all ext4 file systems */
>   wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
>   
>   static int __init ext4_init_fs(void)
>   {
> +	int error;
> +
>   	ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
>   	ext4_li_info = NULL;
>   
> @@ -7338,23 +7295,40 @@ static int __init ext4_init_fs(void)
>   	for (int i = 0; i < EXT4_WQ_HASH_SZ; i++)
>   		init_waitqueue_head(&ext4__ioend_wq[i]);
>   
> -	module_subinit(ext4_init_es, ext4_exit_es, &rollback);
> -	module_subinit(ext4_init_pending, ext4_exit_pending, &rollback);
> -	module_subinit(ext4_init_post_read_processing, ext4_exit_post_read_processing, &rollback);
> -	module_subinit(ext4_init_pageio, ext4_exit_pageio, &rollback);
> -	module_subinit(ext4_init_system_zone, ext4_exit_system_zone, &rollback);
> -	module_subinit(ext4_init_sysfs, ext4_exit_sysfs, &rollback);
> -	module_subinit(ext4_init_mballoc, ext4_exit_mballoc, &rollback);
> -	module_subinit(init_inodecache, destroy_inodecache, &rollback);
> -	module_subinit(ext4_fc_init_dentry_cache, ext4_fc_destroy_dentry_cache, &rollback);
> -	module_subinit(register_ext, unregister_ext, &rollback);
> +	ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
> +				sizeof(struct ext4_inode_info), 0,
> +				SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
> +				offsetof(struct ext4_inode_info, i_data),
> +				sizeof_field(struct ext4_inode_info, i_data),
> +				init_once);
> +	if (ext4_inode_cachep == NULL)
> +		return -ENOMEM;
>   
> -	return 0;
> +	register_as_ext3();
> +	register_as_ext2();
> +	error = register_filesystem(&ext4_fs_type);
> +	if (error)
> +		kmem_cache_destroy(ext4_inode_cachep);
> +	return error;
> +}
> +
> +static void __exit ext4_exit_fs(void)
> +{
> +	ext4_destroy_lazyinit_thread();
> +	unregister_as_ext2();
> +	unregister_as_ext3();
> +	unregister_filesystem(&ext4_fs_type);
> +
> +	/*
> +	 * Make sure all delayed rcu free inodes are flushed before we
> +	 * destroy cache.
> +	 */
> +	rcu_barrier();
> +	kmem_cache_destroy(ext4_inode_cachep);
>   }
> +module_subinit(ext4_init_fs, ext4_exit_fs);
>   
>   MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
>   MODULE_DESCRIPTION("Fourth Extended Filesystem");
>   MODULE_LICENSE("GPL");
>   MODULE_SOFTDEP("pre: crc32c");
> -module_init(ext4_init_fs)
> -module_exit(ext4_exit_fs)
> diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
> index ddb54608ca2ef6..df3096a9e6e39a 100644
> --- a/fs/ext4/sysfs.c
> +++ b/fs/ext4/sysfs.c
> @@ -601,7 +601,7 @@ void ext4_unregister_sysfs(struct super_block *sb)
>   	kobject_del(&sbi->s_kobj);
>   }
>   
> -int __init ext4_init_sysfs(void)
> +static int __init ext4_init_sysfs(void)
>   {
>   	int ret;
>   
> @@ -632,7 +632,7 @@ int __init ext4_init_sysfs(void)
>   	return ret;
>   }
>   
> -void ext4_exit_sysfs(void)
> +static void ext4_exit_sysfs(void)
>   {
>   	kobject_put(ext4_feat);
>   	ext4_feat = NULL;
> @@ -641,4 +641,4 @@ void ext4_exit_sysfs(void)
>   	remove_proc_entry(proc_dirname, NULL);
>   	ext4_proc_root = NULL;
>   }
> -
> +module_subinit(ext4_init_sysfs, ext4_exit_sysfs);
> diff --git a/include/linux/module.h b/include/linux/module.h
> index 95f7c60dede9a4..3099fb2c3d813b 100644
> --- a/include/linux/module.h
> +++ b/include/linux/module.h
> @@ -80,23 +80,13 @@ extern void cleanup_module(void);
>    * module_subinit() - Called when the driver is subinitialized
>    * @initfn: The subinitialization function that is called
>    * @exitfn: Corresponding exit function
> - * @rollback: Record information when the subinitialization failed
> - *            or the driver was removed
>    *
>    * Use module_subinit_noexit() when there is only an subinitialization
>    * function but no corresponding exit function.
>    */
> -#define module_subinit(initfn, exitfn, rollback)	\
> -	__subinitcall(initfn, exitfn, rollback);
> +#define module_subinit(initfn, exitfn)
>   
> -#define module_subinit_noexit(initfn, rollback)		\
> -	__subinitcall_noexit(initfn, rollback);
> -
> -/*
> - * module_subexit() - Called when the driver exits
> - */
> -#define module_subexit(rollback)			\
> -	__subexitcall_rollback(rollback);
> +#define module_subinit_noexit(initfn)
>   
>   #ifndef MODULE
>   /**
Based on this patch, we may need to do these things with this
implementation,

1. Change the order of *.o in the Makefile (the same order as before the 
change)
```
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -5,12 +5,16 @@

  obj-$(CONFIG_EXT4_FS) += ext4.o

-ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
-               extents_status.o file.o fsmap.o fsync.o hash.o ialloc.o \
-               indirect.o inline.o inode.o ioctl.o mballoc.o migrate.o \
-               mmp.o move_extent.o namei.o page-io.o readpage.o resize.o \
-               super.o symlink.o sysfs.o xattr.o xattr_hurd.o 
xattr_trusted.o \
-               xattr_user.o fast_commit.o orphan.o
+ext4-y := balloc.o bitmap.o dir.o ext4_jbd2.o extents.o \
+               file.o fsmap.o fsync.o hash.o ialloc.o \
+               indirect.o inline.o inode.o ioctl.o migrate.o \
+               mmp.o move_extent.o namei.o resize.o \
+               symlink.o xattr.o xattr_hurd.o xattr_trusted.o \
+               xattr_user.o orphan.o
+
+# Ensure the linking order for module_subinit
+ext4-y += extents_status.o readpage.o page-io.o block_validity.o sysfs.o \
+               mballoc.o fast_commit.o super.o
```

2. We need to define module_subinit through the ifdef MODULE
distinction,

- build-in mode:
Need to be defined as:
define module_subinit(initfn, 
exitfn)                                         \
        static subinitcall_t __subinitcall_##initfn __subinit_call = 
initfn;   \
        static subexitcall_t __subexitcall_##exitfn __subexit_call = exitfn;

Cannot be defined as:
define module_subinit(initfn, 
exitfn)                                         \
        static subinitcall_t __subinitcall_##initfn __subinit_call = 
initfn;   \
        static subexitcall_t __subexitcall_##exitfn __subexit_call = 
exitfn;   \
        initfn();

module_subinit defines only __subinitcall_{initfn, exitfn} symbols
to store initfn/exitfn addresses.initfn cannot be run directly
(functions cannot be run directly in non-code blocks), the initfn
of all build-in modules needs to be executed together somewhere.

When one of the subinit runs in a module fails, it is difficult
to rollback execution of subexit.

module_init does not have to do these things (each module has only
one module_init), module_init executes fn through the following path,
and even if fn fails, it only needs to return ret.
do_initcalls
   do_initcall_level
     do_one_initcall
       fn

- .ko mode:
Each module has multiple subinit and initfn cannot be run like
module_init. That is, we cannot simply add a subinit member to a
struct module and execute it via do_one_initcall(mod->subinit).

3. Another way to record the location of the failure is still needed
to assist in rollback exitfn.

4. The order in which subinit is called is not intuitively known
(although it can be found in the Makefile).

Thanks,
Youling.
Christoph Hellwig July 26, 2024, 2:04 p.m. UTC | #13
On Fri, Jul 26, 2024 at 04:54:59PM +0800, Youling Tang wrote:
> Based on this patch, we may need to do these things with this
>
>
> 1. Change the order of *.o in the Makefile (the same order as before the
> change)

While we'll need to be careful, we don't need to match the exact
order.  Most of the calls simply create slab caches / mempools and
similar things and the order for those does not matter at all.

Of course the register_filesytem calls need to be last, and sysfs
registration probably should be second to last, but for the vast
amount of calls the order does not matter as long as it is unwound
in reverse order.

> 2. We need to define module_subinit through the ifdef MODULE
> distinction,

Yes.

> When one of the subinit runs in a module fails, it is difficult
> to rollback execution of subexit.

By having both section in the same order, you an just walk the
exit section backwards from the offset that failed.  Of course that
only matters for the modular case as normal initcalls don't get
unwound when built-in either.

> 4. The order in which subinit is called is not intuitively known
> (although it can be found in the Makefile).

Link order through make file is already a well known concept due to
it mattering for built-in code.
David Sterba July 26, 2024, 3:22 p.m. UTC | #14
On Fri, Jul 26, 2024 at 07:04:35AM -0700, Christoph Hellwig wrote:
> On Fri, Jul 26, 2024 at 04:54:59PM +0800, Youling Tang wrote:
> > Based on this patch, we may need to do these things with this
> >
> >
> > 1. Change the order of *.o in the Makefile (the same order as before the
> > change)
> 
> While we'll need to be careful, we don't need to match the exact
> order.  Most of the calls simply create slab caches / mempools and
> similar things and the order for those does not matter at all.
> 
> Of course the register_filesytem calls need to be last, and sysfs
> registration probably should be second to last, but for the vast
> amount of calls the order does not matter as long as it is unwound
> in reverse order.
> 
> > 2. We need to define module_subinit through the ifdef MODULE
> > distinction,
> 
> Yes.
> 
> > When one of the subinit runs in a module fails, it is difficult
> > to rollback execution of subexit.
> 
> By having both section in the same order, you an just walk the
> exit section backwards from the offset that failed.  Of course that
> only matters for the modular case as normal initcalls don't get
> unwound when built-in either.
> 
> > 4. The order in which subinit is called is not intuitively known
> > (although it can be found in the Makefile).
> 
> Link order through make file is already a well known concept due to
> it mattering for built-in code.

All of this sounds overengineered for something that is a simple array
and two helpers. The code is not finalized so I'll wait for the next
version but specific file order in makefile and linker tricks seems
fragile and I'm not sure I want this for btrfs.
Theodore Ts'o July 26, 2024, 5:58 p.m. UTC | #15
On Fri, Jul 26, 2024 at 05:22:37PM +0200, David Sterba wrote:
> All of this sounds overengineered for something that is a simple array
> and two helpers. The code is not finalized so I'll wait for the next
> version but specific file order in makefile and linker tricks seems
> fragile and I'm not sure I want this for btrfs.

Yeah, that's my reaction as well.  This only saves 50 lines of code in
ext4, and that includes unrelated changes such as getting rid of "int
i" and putting the declaration into the for loop --- "for (int i =
...").  Sure, that saves two lines of code, but yay?

If the ordering how the functions gets called is based on the magic
ordering in the Makefile, I'm not sure this actually makes the code
clearer, more robust, and easier to maintain for the long term.

	      	      	  	    	     	 - Ted
Christoph Hellwig July 26, 2024, 6:09 p.m. UTC | #16
On Fri, Jul 26, 2024 at 01:58:00PM -0400, Theodore Ts'o wrote:
> Yeah, that's my reaction as well.  This only saves 50 lines of code in
> ext4, and that includes unrelated changes such as getting rid of "int
> i" and putting the declaration into the for loop --- "for (int i =
> ...").  Sure, that saves two lines of code, but yay?
> 
> If the ordering how the functions gets called is based on the magic
> ordering in the Makefile, I'm not sure this actually makes the code
> clearer, more robust, and easier to maintain for the long term.

So you two object to kernel initcalls for the same reason and would
rather go back to calling everything explicitly?
David Sterba July 26, 2024, 10:45 p.m. UTC | #17
On Fri, Jul 26, 2024 at 11:09:02AM -0700, Christoph Hellwig wrote:
> On Fri, Jul 26, 2024 at 01:58:00PM -0400, Theodore Ts'o wrote:
> > Yeah, that's my reaction as well.  This only saves 50 lines of code in
> > ext4, and that includes unrelated changes such as getting rid of "int
> > i" and putting the declaration into the for loop --- "for (int i =
> > ...").  Sure, that saves two lines of code, but yay?
> > 
> > If the ordering how the functions gets called is based on the magic
> > ordering in the Makefile, I'm not sure this actually makes the code
> > clearer, more robust, and easier to maintain for the long term.
> 
> So you two object to kernel initcalls for the same reason and would
> rather go back to calling everything explicitly?

No and not my call to do it for the kernel. Somebody probably had a
reason use the initcalls, there are probably practical reasons for that.
Quick grep shows there are thousands of initcalls scattered over the
whole code base, that does ask for some tricks because updating a single
file with explicit calls would be a nightmare. Unlike for a subsystem
inside one directory, like a filesystem.
Theodore Ts'o July 27, 2024, 2:52 p.m. UTC | #18
On Fri, Jul 26, 2024 at 11:09:02AM -0700, Christoph Hellwig wrote:
> On Fri, Jul 26, 2024 at 01:58:00PM -0400, Theodore Ts'o wrote:
> > Yeah, that's my reaction as well.  This only saves 50 lines of code in
> > ext4, and that includes unrelated changes such as getting rid of "int
> > i" and putting the declaration into the for loop --- "for (int i =
> > ...").  Sure, that saves two lines of code, but yay?
> > 
> > If the ordering how the functions gets called is based on the magic
> > ordering in the Makefile, I'm not sure this actually makes the code
> > clearer, more robust, and easier to maintain for the long term.
> 
> So you two object to kernel initcalls for the same reason and would
> rather go back to calling everything explicitly?

I don't oject to kernel initcalls which don't have any
interdependencies and where ordering doesn't matter.

						- Ted
Youling Tang July 29, 2024, 1:46 a.m. UTC | #19
On 27/07/2024 22:52, Theodore Ts'o wrote:
> On Fri, Jul 26, 2024 at 11:09:02AM -0700, Christoph Hellwig wrote:
>> On Fri, Jul 26, 2024 at 01:58:00PM -0400, Theodore Ts'o wrote:
>>> Yeah, that's my reaction as well.  This only saves 50 lines of code in
>>> ext4, and that includes unrelated changes such as getting rid of "int
>>> i" and putting the declaration into the for loop --- "for (int i =
>>> ...").  Sure, that saves two lines of code, but yay?
>>>
>>> If the ordering how the functions gets called is based on the magic
>>> ordering in the Makefile, I'm not sure this actually makes the code
>>> clearer, more robust, and easier to maintain for the long term.
>> So you two object to kernel initcalls for the same reason and would
>> rather go back to calling everything explicitly?
> I don't oject to kernel initcalls which don't have any
> interdependencies and where ordering doesn't matter.
1. Previous version implementation: array mode (see link 1) :
    Advantages:
    - Few changes, simple principle, easy to understand code.
    Disadvantages:
    - Each modified module needs to maintain an array, more code.

2. Current implementation: explicit call subinit in initcall (see link 2) :
    Advantages:
    - Direct use of helpes macros, the subinit call sequence is
      intuitive, and the implementation is relatively simple.
    Disadvantages:
    - helper macros need to be implemented compared to array mode.

3. Only one module_subinit per file (not implemented, see link 3) :
    Advantage:
    - No need to display to call subinit.
    Disadvantages:
    - Magic order based on Makefile makes code more fragile,
    - Make sure that each file has only one module_subinit,
    - It is not intuitive to know which subinits the module needs
      and in what order (grep and Makefile are required),
    - With multiple subinits per module, it would be difficult to
      define module_{subinit, subexit} by MODULE, and difficult to
      rollback when initialization fails (I haven't found a good way
      to do this yet).


Personally, I prefer the implementation of method two.

Links:
[1]: 
https://lore.kernel.org/all/20240711074859.366088-4-youling.tang@linux.dev/
[2]: 
https://lore.kernel.org/all/20240723083239.41533-2-youling.tang@linux.dev/
[3]: https://lore.kernel.org/all/ZqKreStOD-eRkKZU@infradead.org/

Thanks,
Youling.
Theodore Ts'o July 29, 2024, 2:44 a.m. UTC | #20
On Mon, Jul 29, 2024 at 09:46:17AM +0800, Youling Tang wrote:
> 1. Previous version implementation: array mode (see link 1) :
>    Advantages:
>    - Few changes, simple principle, easy to understand code.
>    Disadvantages:
>    - Each modified module needs to maintain an array, more code.
> 
> 2. Current implementation: explicit call subinit in initcall (see link 2) :
>    Advantages:
>    - Direct use of helpes macros, the subinit call sequence is
>      intuitive, and the implementation is relatively simple.
>    Disadvantages:
>    - helper macros need to be implemented compared to array mode.
> 
> 3. Only one module_subinit per file (not implemented, see link 3) :
>    Advantage:
>    - No need to display to call subinit.
>    Disadvantages:
>    - Magic order based on Makefile makes code more fragile,
>    - Make sure that each file has only one module_subinit,
>    - It is not intuitive to know which subinits the module needs
>      and in what order (grep and Makefile are required),
>    - With multiple subinits per module, it would be difficult to
>      define module_{subinit, subexit} by MODULE, and difficult to
>      rollback when initialization fails (I haven't found a good way
>      to do this yet).
> 
>
> Personally, I prefer the implementation of method two.

But there's also method zero --- keep things the way they are, and
don't try to add a new astraction.

Advantage:

 -- Code has worked for decades, so it is very well tested
 -- Very easy to understand and maintain

Disadvantage

 --- A few extra lines of C code.

which we need to weigh against the other choices.

      	      	       	       	   - Ted
Youling Tang July 29, 2024, 3:01 a.m. UTC | #21
On 29/07/2024 10:44, Theodore Ts'o wrote:
> On Mon, Jul 29, 2024 at 09:46:17AM +0800, Youling Tang wrote:
>> 1. Previous version implementation: array mode (see link 1) :
>>     Advantages:
>>     - Few changes, simple principle, easy to understand code.
>>     Disadvantages:
>>     - Each modified module needs to maintain an array, more code.
>>
>> 2. Current implementation: explicit call subinit in initcall (see link 2) :
>>     Advantages:
>>     - Direct use of helpes macros, the subinit call sequence is
>>       intuitive, and the implementation is relatively simple.
>>     Disadvantages:
>>     - helper macros need to be implemented compared to array mode.
>>
>> 3. Only one module_subinit per file (not implemented, see link 3) :
>>     Advantage:
>>     - No need to display to call subinit.
>>     Disadvantages:
>>     - Magic order based on Makefile makes code more fragile,
>>     - Make sure that each file has only one module_subinit,
>>     - It is not intuitive to know which subinits the module needs
>>       and in what order (grep and Makefile are required),
>>     - With multiple subinits per module, it would be difficult to
>>       define module_{subinit, subexit} by MODULE, and difficult to
>>       rollback when initialization fails (I haven't found a good way
>>       to do this yet).
>>
>>
>> Personally, I prefer the implementation of method two.
> But there's also method zero --- keep things the way they are, and
> don't try to add a new astraction.
>
> Advantage:
>
>   -- Code has worked for decades, so it is very well tested
>   -- Very easy to understand and maintain
>
> Disadvantage
>
>   --- A few extra lines of C code.
The number of lines of code is not important, the main point is to
better ensure that subexit runs in the reverse order of subinit when
init fails.

Thanks,
Youling.

>
> which we need to weigh against the other choices.
>
>        	      	       	       	   - Ted
Christoph Hellwig July 29, 2024, 6:57 p.m. UTC | #22
On Sun, Jul 28, 2024 at 10:44:12PM -0400, Theodore Ts'o wrote:
> >
> > Personally, I prefer the implementation of method two.
> 
> But there's also method zero --- keep things the way they are, and
> don't try to add a new astraction.
> 
> Advantage:
> 
>  -- Code has worked for decades, so it is very well tested
>  -- Very easy to understand and maintain
> 
> Disadvantage
> 
>  --- A few extra lines of C code.
> 
> which we need to weigh against the other choices.

I think option zero is the right option for you and David and anyone
scared of link order issues.

But I know for XFS or the nvme code having multiple initcalls per
module would be extremely helpfu.  I don't really want to drag Youling
into implementing something he is not behind, but I plan to try that
out myself once I find a little time.
diff mbox series

Patch

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 677315e51e54..48ccac7c6448 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -927,6 +927,10 @@ 
 		INIT_CALLS_LEVEL(7)					\
 		__initcall_end = .;
 
+#define SUBINIT_CALL							\
+		*(.subinitcall.init)					\
+		*(.subexitcall.exit)
+
 #define CON_INITCALL							\
 	BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end)
 
@@ -1155,6 +1159,7 @@ 
 		INIT_DATA						\
 		INIT_SETUP(initsetup_align)				\
 		INIT_CALLS						\
+		SUBINIT_CALL						\
 		CON_INITCALL						\
 		INIT_RAM_FS						\
 	}
diff --git a/include/linux/init.h b/include/linux/init.h
index ee1309473bc6..e8689ff2cb6c 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -55,6 +55,9 @@ 
 #define __exitdata	__section(".exit.data")
 #define __exit_call	__used __section(".exitcall.exit")
 
+#define __subinit_call	__used __section(".subinitcall.init")
+#define __subexit_call	__used __section(".subexitcall.exit")
+
 /*
  * modpost check for section mismatches during the kernel build.
  * A section mismatch happens when there are references from a
@@ -115,6 +118,9 @@ 
 typedef int (*initcall_t)(void);
 typedef void (*exitcall_t)(void);
 
+typedef int (*subinitcall_t)(void);
+typedef void (*subexitcall_t)(void);
+
 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
 typedef int initcall_entry_t;
 
@@ -183,7 +189,61 @@  extern struct module __this_module;
 #endif
 
 #endif
-  
+
+#ifndef __ASSEMBLY__
+struct subexitcall_rollback {
+	/*
+	 * Records the address of the first sub-initialization function in the
+	 * ".subexitcall.exit" section
+	 */
+	unsigned long first_addr;
+	int ncalls;
+};
+
+static inline void __subexitcall_rollback(struct subexitcall_rollback *r)
+{
+	unsigned long addr = r->first_addr - sizeof(r->first_addr) * (r->ncalls - 1);
+
+	for (; r->ncalls--; addr += sizeof(r->first_addr)) {
+		unsigned long *tmp = (void *)addr;
+		subexitcall_t fn = (subexitcall_t)*tmp;
+		fn();
+	}
+}
+
+static inline void set_rollback_ncalls(struct subexitcall_rollback *r)
+{
+	r->ncalls++;
+}
+
+static inline void set_rollback_first_addr(struct subexitcall_rollback *rollback,
+					   unsigned long addr)
+{
+	if (!rollback->first_addr)
+		rollback->first_addr = addr;
+}
+
+#define __subinitcall_noexit(initfn, rollback)					\
+do {										\
+	static subinitcall_t __subinitcall_##initfn __subinit_call = initfn;	\
+	int _ret;								\
+	_ret = initfn();							\
+	if (_ret < 0) {								\
+		__subexitcall_rollback(rollback);				\
+		return _ret;							\
+	}									\
+} while (0)
+
+#define __subinitcall(initfn, exitfn, rollback)						\
+do {											\
+	static subexitcall_t __subexitcall_##exitfn __subexit_call = exitfn;		\
+	set_rollback_first_addr(rollback, (unsigned long)&__subexitcall_##exitfn);	\
+	__subinitcall_noexit(initfn, rollback);						\
+	set_rollback_ncalls(rollback);							\
+} while (0)
+
+#endif /* !__ASSEMBLY__ */
+
 #ifndef MODULE
 
 #ifndef __ASSEMBLY__
diff --git a/include/linux/module.h b/include/linux/module.h
index 4213d8993cd8..95f7c60dede9 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -76,6 +76,28 @@  extern struct module_attribute module_uevent;
 extern int init_module(void);
 extern void cleanup_module(void);
 
+/*
+ * module_subinit() - Called when the driver is subinitialized
+ * @initfn: The subinitialization function that is called
+ * @exitfn: Corresponding exit function
+ * @rollback: Record information when the subinitialization failed
+ *            or the driver was removed
+ *
+ * Use module_subinit_noexit() when there is only an subinitialization
+ * function but no corresponding exit function.
+ */
+#define module_subinit(initfn, exitfn, rollback)	\
+	__subinitcall(initfn, exitfn, rollback);
+
+#define module_subinit_noexit(initfn, rollback)		\
+	__subinitcall_noexit(initfn, rollback);
+
+/*
+ * module_subexit() - Called when the driver exits
+ */
+#define module_subexit(rollback)			\
+	__subexitcall_rollback(rollback);
+
 #ifndef MODULE
 /**
  * module_init() - driver initialization entry point