diff mbox series

[5/6] fixdep: avoid parsing the same file over again

Message ID 20221231064203.1623793-6-masahiroy@kernel.org (mailing list archive)
State New, archived
Headers show
Series kbuild: fix dep-file processing for rust | expand

Commit Message

Masahiro Yamada Dec. 31, 2022, 6:42 a.m. UTC
The dep files (*.d files) emitted by C compilers usually contain the
deduplicated list of included files.

There is an exceptional case; if a header is included by the -include
command line option, and also by #include directive, it appears twice
in the *.d file.

For example, the top Makefile specifies the command line option,
-include $(srctree)/include/linux/kconfig.h. You do not need to
add #include <linux/kconfig.h> in every source file.

In fact, include/linux/kconfig.h is listed twice in many .*.cmd files
due to include/linux/xarray.h including <linux/kconfig.h>.
I did not fix that since it is a small redundancy.

However, this is more annoying for rustc. rustc emits the dependency
for each emission type.

For example, cmd_rustc_library emits dep-info, obj, and metadata.
So, the emitted *.d file contains the dependency for those 3 targets,
which makes fixdep parse the same file 3 times.

  $ grep rust/alloc/raw_vec.rs rust/.alloc.o.cmd
    rust/alloc/raw_vec.rs \
    rust/alloc/raw_vec.rs \
    rust/alloc/raw_vec.rs \

To skip the second parsing, this commit adds a hash table for parsed
files, just like we did for CONFIG options.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---

 scripts/basic/fixdep.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

Comments

Miguel Ojeda Jan. 3, 2023, 8:46 p.m. UTC | #1
On Sat, Dec 31, 2022 at 7:42 AM Masahiro Yamada <masahiroy@kernel.org> wrote:
>
> To skip the second parsing, this commit adds a hash table for parsed
> files, just like we did for CONFIG options.

Acked-by: Miguel Ojeda <ojeda@kernel.org>
Tested-by: Miguel Ojeda <ojeda@kernel.org>

Cheers,
Miguel
Vincenzo Palazzo Jan. 6, 2023, 9:31 a.m. UTC | #2
Reviewed-by: Vincenzo Palazzo <vincenzopalazzodev@gmail.com>

On Sat Dec 31, 2022 at 7:42 AM CET, Masahiro Yamada wrote:
> The dep files (*.d files) emitted by C compilers usually contain the
> deduplicated list of included files.
>
> There is an exceptional case; if a header is included by the -include
> command line option, and also by #include directive, it appears twice
> in the *.d file.
>
> For example, the top Makefile specifies the command line option,
> -include $(srctree)/include/linux/kconfig.h. You do not need to
> add #include <linux/kconfig.h> in every source file.
>
> In fact, include/linux/kconfig.h is listed twice in many .*.cmd files
> due to include/linux/xarray.h including <linux/kconfig.h>.
> I did not fix that since it is a small redundancy.
>
> However, this is more annoying for rustc. rustc emits the dependency
> for each emission type.
>
> For example, cmd_rustc_library emits dep-info, obj, and metadata.
> So, the emitted *.d file contains the dependency for those 3 targets,
> which makes fixdep parse the same file 3 times.
>
>   $ grep rust/alloc/raw_vec.rs rust/.alloc.o.cmd
>     rust/alloc/raw_vec.rs \
>     rust/alloc/raw_vec.rs \
>     rust/alloc/raw_vec.rs \
>
> To skip the second parsing, this commit adds a hash table for parsed
> files, just like we did for CONFIG options.
>
> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
> ---
>
>  scripts/basic/fixdep.c | 9 +++++++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
> index b20777b888d7..cc8f6d34c2ca 100644
> --- a/scripts/basic/fixdep.c
> +++ b/scripts/basic/fixdep.c
> @@ -113,7 +113,7 @@ struct item {
>  };
>  
>  #define HASHSZ 256
> -static struct item *config_hashtab[HASHSZ];
> +static struct item *config_hashtab[HASHSZ], *file_hashtab[HASHSZ];
>  
>  static unsigned int strhash(const char *str, unsigned int sz)
>  {
> @@ -361,6 +361,10 @@ static void parse_dep_file(char *p, const char *target)
>  			 * name, which will be the original one, and ignore any
>  			 * other source names, which will be intermediate
>  			 * temporary files.
> +			 *
> +			 * rustc emits the same dependency list for each
> +			 * emission type. It is enough to list the source name
> +			 * just once.
>  			 */
>  			if (!saw_any_target) {
>  				saw_any_target = true;
> @@ -368,7 +372,8 @@ static void parse_dep_file(char *p, const char *target)
>  				printf("deps_%s := \\\n", target);
>  				need_parse = true;
>  			}
> -		} else if (!is_ignored_file(p, q - p)) {
> +		} else if (!is_ignored_file(p, q - p) &&
> +			   !in_hashtable(p, q - p, file_hashtab)) {
>  			printf("  %s \\\n", p);
>  			need_parse = true;
>  		}
> -- 
> 2.34.1
diff mbox series

Patch

diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index b20777b888d7..cc8f6d34c2ca 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -113,7 +113,7 @@  struct item {
 };
 
 #define HASHSZ 256
-static struct item *config_hashtab[HASHSZ];
+static struct item *config_hashtab[HASHSZ], *file_hashtab[HASHSZ];
 
 static unsigned int strhash(const char *str, unsigned int sz)
 {
@@ -361,6 +361,10 @@  static void parse_dep_file(char *p, const char *target)
 			 * name, which will be the original one, and ignore any
 			 * other source names, which will be intermediate
 			 * temporary files.
+			 *
+			 * rustc emits the same dependency list for each
+			 * emission type. It is enough to list the source name
+			 * just once.
 			 */
 			if (!saw_any_target) {
 				saw_any_target = true;
@@ -368,7 +372,8 @@  static void parse_dep_file(char *p, const char *target)
 				printf("deps_%s := \\\n", target);
 				need_parse = true;
 			}
-		} else if (!is_ignored_file(p, q - p)) {
+		} else if (!is_ignored_file(p, q - p) &&
+			   !in_hashtable(p, q - p, file_hashtab)) {
 			printf("  %s \\\n", p);
 			need_parse = true;
 		}