Message ID | 20230509051403.2748545-3-lucas.demarchi@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Fixed-width mask/bit helpers | expand |
Quoting Lucas De Marchi (2023-05-09 02:14:02) >Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create >masks for fixed-width types and also the corresponding BIT_U32(), >BIT_U16() and BIT_U8(). > >All of those depend on a new "U" suffix added to the integer constant. >Due to naming clashes it's better to call the macro U32. Since C doesn't >have a proper suffix for short and char types, the U16 and U18 variants >just use U32 with one additional check in the BIT_* macros to make >sure the compiler gives an error when the those types overflow. >The BIT_U16() and BIT_U8() need the help of GENMASK_INPUT_CHECK(), >as otherwise they would allow an invalid bit to be passed. Hence >implement them in include/linux/bits.h rather than together with >the other BIT* variants. > >The following test file is is used to test this: > > $ cat mask.c > #include <linux/types.h> > #include <linux/bits.h> > > static const u32 a = GENMASK_U32(31, 0); > static const u16 b = GENMASK_U16(15, 0); > static const u8 c = GENMASK_U8(7, 0); > static const u32 x = BIT_U32(31); > static const u16 y = BIT_U16(15); > static const u8 z = BIT_U8(7); > > #if FAIL > static const u32 a2 = GENMASK_U32(32, 0); > static const u16 b2 = GENMASK_U16(16, 0); > static const u8 c2 = GENMASK_U8(8, 0); > static const u32 x2 = BIT_U32(32); > static const u16 y2 = BIT_U16(16); > static const u8 z2 = BIT_U8(8); > #endif > >Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com> >--- > include/linux/bits.h | 22 ++++++++++++++++++++++ > include/uapi/linux/const.h | 2 ++ > include/vdso/const.h | 1 + > 3 files changed, 25 insertions(+) > >diff --git a/include/linux/bits.h b/include/linux/bits.h >index 7c0cf5031abe..ff4786c99b8c 100644 >--- a/include/linux/bits.h >+++ b/include/linux/bits.h >@@ -42,4 +42,26 @@ > #define GENMASK_ULL(h, l) \ > (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) > >+#define __GENMASK_U32(h, l) \ >+ (((~U32(0)) - (U32(1) << (l)) + 1) & \ >+ (~U32(0) >> (32 - 1 - (h)))) >+#define GENMASK_U32(h, l) \ >+ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U32(h, l)) >+ >+#define __GENMASK_U16(h, l) \ >+ ((U32(0xffff) - (U32(1) << (l)) + 1) & \ >+ (U32(0xffff) >> (16 - 1 - (h)))) >+#define GENMASK_U16(h, l) \ >+ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U16(h, l)) >+ >+#define __GENMASK_U8(h, l) \ >+ (((U32(0xff)) - (U32(1) << (l)) + 1) & \ >+ (U32(0xff) >> (8 - 1 - (h)))) >+#define GENMASK_U8(h, l) \ >+ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U8(h, l)) I wonder if we should use BIT_U* variants in the above to ensure the values are valid. If so, we get a nice boundary check and we also can use a single definition for the mask generation: #define __GENMASK_U32(h, l) \ (((~U32(0)) - (U32(1) << (l)) + 1) & \ (~U32(0) >> (32 - 1 - (h)))) #define GENMASK_U32(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U32(BIT_U32(h), BIT_U32(l))) #define GENMASK_U16(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U32(BIT_U16(h), BIT_U16(l))) #define GENMASK_U8(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U32(BIT_U8(h), BIT_U8(l))) >+ >+#define BIT_U32(nr) _BITU32(nr) >+#define BIT_U16(nr) (GENMASK_INPUT_CHECK(16 - 1, nr) + (U32(1) << (nr))) >+#define BIT_U8(nr) (GENMASK_INPUT_CHECK(32 - 1, nr) + (U32(1) << (nr))) Shouldn't this be GENMASK_INPUT_CHECK(8 - 1, nr)? -- Gustavo Sousa >+ > #endif /* __LINUX_BITS_H */ >diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h >index a429381e7ca5..3a4e152520f4 100644 >--- a/include/uapi/linux/const.h >+++ b/include/uapi/linux/const.h >@@ -22,9 +22,11 @@ > #define _AT(T,X) ((T)(X)) > #endif > >+#define _U32(x) (_AC(x, U)) > #define _UL(x) (_AC(x, UL)) > #define _ULL(x) (_AC(x, ULL)) > >+#define _BITU32(x) (_U32(1) << (x)) > #define _BITUL(x) (_UL(1) << (x)) > #define _BITULL(x) (_ULL(1) << (x)) > >diff --git a/include/vdso/const.h b/include/vdso/const.h >index 94b385ad438d..417384a9795b 100644 >--- a/include/vdso/const.h >+++ b/include/vdso/const.h >@@ -4,6 +4,7 @@ > > #include <uapi/linux/const.h> > >+#define U32(x) (_U32(x)) > #define UL(x) (_UL(x)) > #define ULL(x) (_ULL(x)) > >-- >2.40.1 >
On Tue, May 09, 2023 at 11:00:36AM -0300, Gustavo Sousa wrote: >Quoting Lucas De Marchi (2023-05-09 02:14:02) >>Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create >>masks for fixed-width types and also the corresponding BIT_U32(), >>BIT_U16() and BIT_U8(). >> >>All of those depend on a new "U" suffix added to the integer constant. >>Due to naming clashes it's better to call the macro U32. Since C doesn't >>have a proper suffix for short and char types, the U16 and U18 variants >>just use U32 with one additional check in the BIT_* macros to make >>sure the compiler gives an error when the those types overflow. >>The BIT_U16() and BIT_U8() need the help of GENMASK_INPUT_CHECK(), >>as otherwise they would allow an invalid bit to be passed. Hence >>implement them in include/linux/bits.h rather than together with >>the other BIT* variants. >> >>The following test file is is used to test this: >> >> $ cat mask.c >> #include <linux/types.h> >> #include <linux/bits.h> >> >> static const u32 a = GENMASK_U32(31, 0); >> static const u16 b = GENMASK_U16(15, 0); >> static const u8 c = GENMASK_U8(7, 0); >> static const u32 x = BIT_U32(31); >> static const u16 y = BIT_U16(15); >> static const u8 z = BIT_U8(7); >> >> #if FAIL >> static const u32 a2 = GENMASK_U32(32, 0); >> static const u16 b2 = GENMASK_U16(16, 0); >> static const u8 c2 = GENMASK_U8(8, 0); >> static const u32 x2 = BIT_U32(32); >> static const u16 y2 = BIT_U16(16); >> static const u8 z2 = BIT_U8(8); >> #endif >> >>Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com> >>--- >> include/linux/bits.h | 22 ++++++++++++++++++++++ >> include/uapi/linux/const.h | 2 ++ >> include/vdso/const.h | 1 + >> 3 files changed, 25 insertions(+) >> >>diff --git a/include/linux/bits.h b/include/linux/bits.h >>index 7c0cf5031abe..ff4786c99b8c 100644 >>--- a/include/linux/bits.h >>+++ b/include/linux/bits.h >>@@ -42,4 +42,26 @@ >> #define GENMASK_ULL(h, l) \ >> (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) >> >>+#define __GENMASK_U32(h, l) \ >>+ (((~U32(0)) - (U32(1) << (l)) + 1) & \ >>+ (~U32(0) >> (32 - 1 - (h)))) >>+#define GENMASK_U32(h, l) \ >>+ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U32(h, l)) >>+ >>+#define __GENMASK_U16(h, l) \ >>+ ((U32(0xffff) - (U32(1) << (l)) + 1) & \ >>+ (U32(0xffff) >> (16 - 1 - (h)))) >>+#define GENMASK_U16(h, l) \ >>+ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U16(h, l)) >>+ >>+#define __GENMASK_U8(h, l) \ >>+ (((U32(0xff)) - (U32(1) << (l)) + 1) & \ >>+ (U32(0xff) >> (8 - 1 - (h)))) >>+#define GENMASK_U8(h, l) \ >>+ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U8(h, l)) > >I wonder if we should use BIT_U* variants in the above to ensure the values are >valid. If so, we get a nice boundary check and we also can use a single >definition for the mask generation: > > #define __GENMASK_U32(h, l) \ > (((~U32(0)) - (U32(1) << (l)) + 1) & \ > (~U32(0) >> (32 - 1 - (h)))) the boundary for h and l are already covered here because (32 - 1 - (h)) would lead to a negative value if h >= 32. Similar reason for l Doing ~U32(0) didn't work for me as it wouldn't catch the invalid values due to expanding to U32_MAX > #define GENMASK_U32(h, l) \ > (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U32(BIT_U32(h), BIT_U32(l))) ^^^^ that doesn't really work as BIT_U32(h) would expand here, creating the equivalent of ~U32(0) >> (32 - 1 - (BIT_U32(h))), which is not what we want > #define GENMASK_U16(h, l) \ > (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U32(BIT_U16(h), BIT_U16(l))) > #define GENMASK_U8(h, l) \ > (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U32(BIT_U8(h), BIT_U8(l))) > >>+ >>+#define BIT_U32(nr) _BITU32(nr) >>+#define BIT_U16(nr) (GENMASK_INPUT_CHECK(16 - 1, nr) + (U32(1) << (nr))) >>+#define BIT_U8(nr) (GENMASK_INPUT_CHECK(32 - 1, nr) + (U32(1) << (nr))) > >Shouldn't this be GENMASK_INPUT_CHECK(8 - 1, nr)? ugh, good catch. Thanks I will think if I can come up with something that reuses a single __GENMASK_U(). Meanwhile I improved my negative tests to cover more cases. Lucas De Marchi > >-- >Gustavo Sousa > >>+ >> #endif /* __LINUX_BITS_H */ >>diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h >>index a429381e7ca5..3a4e152520f4 100644 >>--- a/include/uapi/linux/const.h >>+++ b/include/uapi/linux/const.h >>@@ -22,9 +22,11 @@ >> #define _AT(T,X) ((T)(X)) >> #endif >> >>+#define _U32(x) (_AC(x, U)) >> #define _UL(x) (_AC(x, UL)) >> #define _ULL(x) (_AC(x, ULL)) >> >>+#define _BITU32(x) (_U32(1) << (x)) >> #define _BITUL(x) (_UL(1) << (x)) >> #define _BITULL(x) (_ULL(1) << (x)) >> >>diff --git a/include/vdso/const.h b/include/vdso/const.h >>index 94b385ad438d..417384a9795b 100644 >>--- a/include/vdso/const.h >>+++ b/include/vdso/const.h >>@@ -4,6 +4,7 @@ >> >> #include <uapi/linux/const.h> >> >>+#define U32(x) (_U32(x)) >> #define UL(x) (_UL(x)) >> #define ULL(x) (_ULL(x)) >> >>-- >>2.40.1 >>
Hi Lucas, kernel test robot noticed the following build errors: [auto build test ERROR on drm-intel/for-linux-next] [also build test ERROR on drm-intel/for-linux-next-fixes drm-tip/drm-tip linus/master v6.4-rc1 next-20230510] [cannot apply to drm-misc/drm-misc-next] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Lucas-De-Marchi/drm-amd-Remove-wrapper-macros-over-get_u-32-16-8/20230509-131544 base: git://anongit.freedesktop.org/drm-intel for-linux-next patch link: https://lore.kernel.org/r/20230509051403.2748545-3-lucas.demarchi%40intel.com patch subject: [PATCH 2/3] linux/bits.h: Add fixed-width GENMASK and BIT macros config: arm64-randconfig-r021-20230509 (https://download.01.org/0day-ci/archive/20230510/202305102048.2O5u4Wia-lkp@intel.com/config) compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project b0fb98227c90adf2536c9ad644a74d5e92961111) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install arm64 cross compiling tool for clang build # apt-get install binutils-aarch64-linux-gnu # https://github.com/intel-lab-lkp/linux/commit/dc308f14f76fa2d6c1698a701dfbe0f1b247e6bd git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Lucas-De-Marchi/drm-amd-Remove-wrapper-macros-over-get_u-32-16-8/20230509-131544 git checkout dc308f14f76fa2d6c1698a701dfbe0f1b247e6bd # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 SHELL=/bin/bash lib/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp@intel.com> | Link: https://lore.kernel.org/oe-kbuild-all/202305102048.2O5u4Wia-lkp@intel.com/ All errors (new ones prefixed by >>): >> lib/zstd/compress/zstd_opt.c:785:9: error: type specifier missing, defaults to 'int'; ISO C99 and later do not support implicit int [-Wimplicit-int] typedef U32 (*ZSTD_getAllMatchesFn)( ~~~~~~~ ^ int include/vdso/const.h:7:18: note: expanded from macro 'U32' #define U32(x) (_U32(x)) ^ include/uapi/linux/const.h:25:19: note: expanded from macro '_U32' #define _U32(x) (_AC(x, U)) ^ include/uapi/linux/const.h:21:18: note: expanded from macro '_AC' #define _AC(X,Y) __AC(X,Y) ^ include/uapi/linux/const.h:20:20: note: expanded from macro '__AC' #define __AC(X,Y) (X##Y) ^ <scratch space>:178:1: note: expanded from here ZSTD_getAllMatchesFnU ^ >> lib/zstd/compress/zstd_opt.c:851:8: error: unknown type name 'ZSTD_getAllMatchesFn'; did you mean 'ZSTD_getAllMatchesFnU'? static ZSTD_getAllMatchesFn ^~~~~~~~~~~~~~~~~~~~ ZSTD_getAllMatchesFnU lib/zstd/compress/zstd_opt.c:785:9: note: 'ZSTD_getAllMatchesFnU' declared here typedef U32 (*ZSTD_getAllMatchesFn)( ^ include/vdso/const.h:7:18: note: expanded from macro 'U32' #define U32(x) (_U32(x)) ^ include/uapi/linux/const.h:25:19: note: expanded from macro '_U32' #define _U32(x) (_AC(x, U)) ^ include/uapi/linux/const.h:21:18: note: expanded from macro '_AC' #define _AC(X,Y) __AC(X,Y) ^ include/uapi/linux/const.h:20:20: note: expanded from macro '__AC' #define __AC(X,Y) (X##Y) ^ <scratch space>:178:1: note: expanded from here ZSTD_getAllMatchesFnU ^ >> lib/zstd/compress/zstd_opt.c:854:5: error: use of undeclared identifier 'ZSTD_getAllMatchesFn' ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = { ^ >> lib/zstd/compress/zstd_opt.c:862:12: error: use of undeclared identifier 'getAllMatchesFns' return getAllMatchesFns[(int)dictMode][mls - 3]; ^ lib/zstd/compress/zstd_opt.c:1054:5: error: unknown type name 'ZSTD_getAllMatchesFn'; did you mean 'ZSTD_getAllMatchesFnU'? ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode); ^~~~~~~~~~~~~~~~~~~~ ZSTD_getAllMatchesFnU lib/zstd/compress/zstd_opt.c:785:9: note: 'ZSTD_getAllMatchesFnU' declared here typedef U32 (*ZSTD_getAllMatchesFn)( ^ include/vdso/const.h:7:18: note: expanded from macro 'U32' #define U32(x) (_U32(x)) ^ include/uapi/linux/const.h:25:19: note: expanded from macro '_U32' #define _U32(x) (_AC(x, U)) ^ include/uapi/linux/const.h:21:18: note: expanded from macro '_AC' #define _AC(X,Y) __AC(X,Y) ^ include/uapi/linux/const.h:20:20: note: expanded from macro '__AC' #define __AC(X,Y) (X##Y) ^ <scratch space>:178:1: note: expanded from here ZSTD_getAllMatchesFnU ^ 5 errors generated. vim +/int +785 lib/zstd/compress/zstd_opt.c e0c1b49f5b674c Nick Terrell 2020-09-11 784 2aa14b1ab2c41a Nick Terrell 2022-10-17 @785 typedef U32 (*ZSTD_getAllMatchesFn)( 2aa14b1ab2c41a Nick Terrell 2022-10-17 786 ZSTD_match_t*, 2aa14b1ab2c41a Nick Terrell 2022-10-17 787 ZSTD_matchState_t*, 2aa14b1ab2c41a Nick Terrell 2022-10-17 788 U32*, 2aa14b1ab2c41a Nick Terrell 2022-10-17 789 const BYTE*, 2aa14b1ab2c41a Nick Terrell 2022-10-17 790 const BYTE*, 2aa14b1ab2c41a Nick Terrell 2022-10-17 791 const U32 rep[ZSTD_REP_NUM], 2aa14b1ab2c41a Nick Terrell 2022-10-17 792 U32 const ll0, 2aa14b1ab2c41a Nick Terrell 2022-10-17 793 U32 const lengthToBeat); e0c1b49f5b674c Nick Terrell 2020-09-11 794 2aa14b1ab2c41a Nick Terrell 2022-10-17 795 FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal( 2aa14b1ab2c41a Nick Terrell 2022-10-17 796 ZSTD_match_t* matches, e0c1b49f5b674c Nick Terrell 2020-09-11 797 ZSTD_matchState_t* ms, e0c1b49f5b674c Nick Terrell 2020-09-11 798 U32* nextToUpdate3, 2aa14b1ab2c41a Nick Terrell 2022-10-17 799 const BYTE* ip, 2aa14b1ab2c41a Nick Terrell 2022-10-17 800 const BYTE* const iHighLimit, e0c1b49f5b674c Nick Terrell 2020-09-11 801 const U32 rep[ZSTD_REP_NUM], e0c1b49f5b674c Nick Terrell 2020-09-11 802 U32 const ll0, 2aa14b1ab2c41a Nick Terrell 2022-10-17 803 U32 const lengthToBeat, 2aa14b1ab2c41a Nick Terrell 2022-10-17 804 const ZSTD_dictMode_e dictMode, 2aa14b1ab2c41a Nick Terrell 2022-10-17 805 const U32 mls) e0c1b49f5b674c Nick Terrell 2020-09-11 806 { 2aa14b1ab2c41a Nick Terrell 2022-10-17 807 assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls); 2aa14b1ab2c41a Nick Terrell 2022-10-17 808 DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls); 2aa14b1ab2c41a Nick Terrell 2022-10-17 809 if (ip < ms->window.base + ms->nextToUpdate) 2aa14b1ab2c41a Nick Terrell 2022-10-17 810 return 0; /* skipped area */ 2aa14b1ab2c41a Nick Terrell 2022-10-17 811 ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode); 2aa14b1ab2c41a Nick Terrell 2022-10-17 812 return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls); 2aa14b1ab2c41a Nick Terrell 2022-10-17 813 } 2aa14b1ab2c41a Nick Terrell 2022-10-17 814 2aa14b1ab2c41a Nick Terrell 2022-10-17 815 #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls 2aa14b1ab2c41a Nick Terrell 2022-10-17 816 2aa14b1ab2c41a Nick Terrell 2022-10-17 817 #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 818 static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 819 ZSTD_match_t* matches, \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 820 ZSTD_matchState_t* ms, \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 821 U32* nextToUpdate3, \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 822 const BYTE* ip, \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 823 const BYTE* const iHighLimit, \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 824 const U32 rep[ZSTD_REP_NUM], \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 825 U32 const ll0, \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 826 U32 const lengthToBeat) \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 827 { \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 828 return ZSTD_btGetAllMatches_internal( \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 829 matches, ms, nextToUpdate3, ip, iHighLimit, \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 830 rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 831 } 2aa14b1ab2c41a Nick Terrell 2022-10-17 832 2aa14b1ab2c41a Nick Terrell 2022-10-17 833 #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 834 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 835 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 836 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 837 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6) 2aa14b1ab2c41a Nick Terrell 2022-10-17 838 2aa14b1ab2c41a Nick Terrell 2022-10-17 839 GEN_ZSTD_BT_GET_ALL_MATCHES(noDict) 2aa14b1ab2c41a Nick Terrell 2022-10-17 840 GEN_ZSTD_BT_GET_ALL_MATCHES(extDict) 2aa14b1ab2c41a Nick Terrell 2022-10-17 841 GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState) 2aa14b1ab2c41a Nick Terrell 2022-10-17 842 2aa14b1ab2c41a Nick Terrell 2022-10-17 843 #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 844 { \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 845 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 846 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 847 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 848 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \ 2aa14b1ab2c41a Nick Terrell 2022-10-17 849 } 2aa14b1ab2c41a Nick Terrell 2022-10-17 850 2aa14b1ab2c41a Nick Terrell 2022-10-17 @851 static ZSTD_getAllMatchesFn 2aa14b1ab2c41a Nick Terrell 2022-10-17 852 ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode) e0c1b49f5b674c Nick Terrell 2020-09-11 853 { 2aa14b1ab2c41a Nick Terrell 2022-10-17 @854 ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = { 2aa14b1ab2c41a Nick Terrell 2022-10-17 855 ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict), 2aa14b1ab2c41a Nick Terrell 2022-10-17 856 ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict), 2aa14b1ab2c41a Nick Terrell 2022-10-17 857 ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState) 2aa14b1ab2c41a Nick Terrell 2022-10-17 858 }; 2aa14b1ab2c41a Nick Terrell 2022-10-17 859 U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6); 2aa14b1ab2c41a Nick Terrell 2022-10-17 860 assert((U32)dictMode < 3); 2aa14b1ab2c41a Nick Terrell 2022-10-17 861 assert(mls - 3 < 4); 2aa14b1ab2c41a Nick Terrell 2022-10-17 @862 return getAllMatchesFns[(int)dictMode][mls - 3]; e0c1b49f5b674c Nick Terrell 2020-09-11 863 } e0c1b49f5b674c Nick Terrell 2020-09-11 864
On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: > Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create > masks for fixed-width types and also the corresponding BIT_U32(), > BIT_U16() and BIT_U8(). Why? > All of those depend on a new "U" suffix added to the integer constant. > Due to naming clashes it's better to call the macro U32. Since C doesn't > have a proper suffix for short and char types, the U16 and U18 variants > just use U32 with one additional check in the BIT_* macros to make > sure the compiler gives an error when the those types overflow. > The BIT_U16() and BIT_U8() need the help of GENMASK_INPUT_CHECK(), > as otherwise they would allow an invalid bit to be passed. Hence > implement them in include/linux/bits.h rather than together with > the other BIT* variants. So, we have _Generic() in case you still wish to implement this.
On Fri, 12 May 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: > On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: >> Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create >> masks for fixed-width types and also the corresponding BIT_U32(), >> BIT_U16() and BIT_U8(). > > Why? The main reason is that GENMASK() and BIT() size varies for 32/64 bit builds. BR, Jani. > >> All of those depend on a new "U" suffix added to the integer constant. >> Due to naming clashes it's better to call the macro U32. Since C doesn't >> have a proper suffix for short and char types, the U16 and U18 variants >> just use U32 with one additional check in the BIT_* macros to make >> sure the compiler gives an error when the those types overflow. >> The BIT_U16() and BIT_U8() need the help of GENMASK_INPUT_CHECK(), >> as otherwise they would allow an invalid bit to be passed. Hence >> implement them in include/linux/bits.h rather than together with >> the other BIT* variants. > > So, we have _Generic() in case you still wish to implement this.
On Fri, May 12, 2023 at 02:25:18PM +0300, Jani Nikula wrote: > On Fri, 12 May 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: > > On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: > >> Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create > >> masks for fixed-width types and also the corresponding BIT_U32(), > >> BIT_U16() and BIT_U8(). > > > > Why? > > The main reason is that GENMASK() and BIT() size varies for 32/64 bit > builds. When needed GENMASK_ULL() can be used (with respective castings perhaps) and BIT_ULL(), no?
On Fri, 12 May 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: > On Fri, May 12, 2023 at 02:25:18PM +0300, Jani Nikula wrote: >> On Fri, 12 May 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: >> > On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: >> >> Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create >> >> masks for fixed-width types and also the corresponding BIT_U32(), >> >> BIT_U16() and BIT_U8(). >> > >> > Why? >> >> The main reason is that GENMASK() and BIT() size varies for 32/64 bit >> builds. > > When needed GENMASK_ULL() can be used (with respective castings perhaps) > and BIT_ULL(), no? How does that help with making them the same 32-bit size on both 32 and 64 bit builds? BR, Jani.
On Fri, May 12, 2023 at 02:14:19PM +0300, Andy Shevchenko wrote: >On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: >> Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create >> masks for fixed-width types and also the corresponding BIT_U32(), >> BIT_U16() and BIT_U8(). > >Why? to create the masks/values for device registers that are of a certain width, preventing mistakes like: #define REG1 0x10 #define REG1_ENABLE BIT(17) #define REG1_FOO GENMASK(16, 15); register_write(REG1_ENABLE, REG1); ... if REG1 is a 16bit register for example. There were mistakes in the past in the i915 source leading to the creation of the REG_* variants on top of normal GENMASK/BIT (see last patch and commit 09b434d4f6d2 ("drm/i915: introduce REG_BIT() and REG_GENMASK() to define register contents") We are preparing another driver (xe), still to be merged but already open (https://gitlab.freedesktop.org/drm/xe/kernel), that has similar requirements. > >> All of those depend on a new "U" suffix added to the integer constant. >> Due to naming clashes it's better to call the macro U32. Since C doesn't >> have a proper suffix for short and char types, the U16 and U18 variants >> just use U32 with one additional check in the BIT_* macros to make >> sure the compiler gives an error when the those types overflow. >> The BIT_U16() and BIT_U8() need the help of GENMASK_INPUT_CHECK(), >> as otherwise they would allow an invalid bit to be passed. Hence >> implement them in include/linux/bits.h rather than together with >> the other BIT* variants. > >So, we have _Generic() in case you still wish to implement this. humn... how would a _Generic() help here? The input is 1 or 2 integer literals (h and l) so the compiler can check it is correct at build time. See example above. Lucas De Marchi
On Fri, May 12, 2023 at 02:45:19PM +0300, Jani Nikula wrote: > On Fri, 12 May 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: > > On Fri, May 12, 2023 at 02:25:18PM +0300, Jani Nikula wrote: > >> On Fri, 12 May 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: > >> > On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: > >> >> Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create > >> >> masks for fixed-width types and also the corresponding BIT_U32(), > >> >> BIT_U16() and BIT_U8(). > >> > > >> > Why? > >> > >> The main reason is that GENMASK() and BIT() size varies for 32/64 bit > >> builds. > > > > When needed GENMASK_ULL() can be used (with respective castings perhaps) > > and BIT_ULL(), no? > > How does that help with making them the same 32-bit size on both 32 and > 64 bit builds? u32 x = GENMASK(); u64 y = GENMASK_ULL(); No? Then use in your code either x or y. Note that I assume that the parameters to GENMASK*() are built-time constants. Is it the case for you?
On Fri, May 12, 2023 at 09:29:23AM -0700, Lucas De Marchi wrote: > On Fri, May 12, 2023 at 02:14:19PM +0300, Andy Shevchenko wrote: > > On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: > > > Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create > > > masks for fixed-width types and also the corresponding BIT_U32(), > > > BIT_U16() and BIT_U8(). > > > > Why? > > to create the masks/values for device registers that are > of a certain width, preventing mistakes like: > > #define REG1 0x10 > #define REG1_ENABLE BIT(17) > #define REG1_FOO GENMASK(16, 15); > > register_write(REG1_ENABLE, REG1); > > > ... if REG1 is a 16bit register for example. There were mistakes in the > past in the i915 source leading to the creation of the REG_* variants on > top of normal GENMASK/BIT (see last patch and commit 09b434d4f6d2 > ("drm/i915: introduce REG_BIT() and REG_GENMASK() to define register > contents") Doesn't it look like something for bitfield.h candidate? If your definition doesn't fit the given mask, bail out.
On Thu, 15 Jun 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: > On Fri, May 12, 2023 at 02:45:19PM +0300, Jani Nikula wrote: >> On Fri, 12 May 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: >> > On Fri, May 12, 2023 at 02:25:18PM +0300, Jani Nikula wrote: >> >> On Fri, 12 May 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: >> >> > On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: >> >> >> Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create >> >> >> masks for fixed-width types and also the corresponding BIT_U32(), >> >> >> BIT_U16() and BIT_U8(). >> >> > >> >> > Why? >> >> >> >> The main reason is that GENMASK() and BIT() size varies for 32/64 bit >> >> builds. >> > >> > When needed GENMASK_ULL() can be used (with respective castings perhaps) >> > and BIT_ULL(), no? >> >> How does that help with making them the same 32-bit size on both 32 and >> 64 bit builds? > > u32 x = GENMASK(); > u64 y = GENMASK_ULL(); > > No? Then use in your code either x or y. Note that I assume that the parameters > to GENMASK*() are built-time constants. Is it the case for you? What's wrong with wanting to define macros with specific size, depending on e.g. hardware registers instead of build size? What would you use for printk format if you wanted to to print GENMASK()? BR, Jani.
On Tue, Jun 20, 2023 at 05:47:34PM +0300, Jani Nikula wrote: > On Thu, 15 Jun 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: > > On Fri, May 12, 2023 at 02:45:19PM +0300, Jani Nikula wrote: > >> On Fri, 12 May 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: > >> > On Fri, May 12, 2023 at 02:25:18PM +0300, Jani Nikula wrote: > >> >> On Fri, 12 May 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: > >> >> > On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: > >> >> >> Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create > >> >> >> masks for fixed-width types and also the corresponding BIT_U32(), > >> >> >> BIT_U16() and BIT_U8(). > >> >> > > >> >> > Why? > >> >> > >> >> The main reason is that GENMASK() and BIT() size varies for 32/64 bit > >> >> builds. > >> > > >> > When needed GENMASK_ULL() can be used (with respective castings perhaps) > >> > and BIT_ULL(), no? > >> > >> How does that help with making them the same 32-bit size on both 32 and > >> 64 bit builds? > > > > u32 x = GENMASK(); > > u64 y = GENMASK_ULL(); > > > > No? Then use in your code either x or y. Note that I assume that the parameters > > to GENMASK*() are built-time constants. Is it the case for you? > > What's wrong with wanting to define macros with specific size, depending > on e.g. hardware registers instead of build size? Nothing, but I think the problem is smaller than it's presented. And there are already header for bitfields with a lot of helpers for (similar) cases if not yours. > What would you use for printk format if you wanted to to print > GENMASK()? %lu, no?
On Tue, Jun 20, 2023 at 05:55:19PM +0300, Andy Shevchenko wrote: >On Tue, Jun 20, 2023 at 05:47:34PM +0300, Jani Nikula wrote: >> On Thu, 15 Jun 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: >> > On Fri, May 12, 2023 at 02:45:19PM +0300, Jani Nikula wrote: >> >> On Fri, 12 May 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: >> >> > On Fri, May 12, 2023 at 02:25:18PM +0300, Jani Nikula wrote: >> >> >> On Fri, 12 May 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: >> >> >> > On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: >> >> >> >> Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create >> >> >> >> masks for fixed-width types and also the corresponding BIT_U32(), >> >> >> >> BIT_U16() and BIT_U8(). >> >> >> > >> >> >> > Why? >> >> >> >> >> >> The main reason is that GENMASK() and BIT() size varies for 32/64 bit >> >> >> builds. >> >> > >> >> > When needed GENMASK_ULL() can be used (with respective castings perhaps) >> >> > and BIT_ULL(), no? >> >> >> >> How does that help with making them the same 32-bit size on both 32 and >> >> 64 bit builds? >> > >> > u32 x = GENMASK(); >> > u64 y = GENMASK_ULL(); >> > >> > No? Then use in your code either x or y. Note that I assume that the parameters >> > to GENMASK*() are built-time constants. Is it the case for you? >> >> What's wrong with wanting to define macros with specific size, depending >> on e.g. hardware registers instead of build size? > >Nothing, but I think the problem is smaller than it's presented. not sure about big/small problem you are talking about. It's a problem for when the *device* register is a 32b fixed width, which is independent from the CPU you are running on. We also have registers that are u16 and u64. Having fixed-width GENMASK and BIT helps avoiding mistakes like below. Just to use one example, the diff below builds fine on my 64b machine, yet it's obviously wrong: $ git diff diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 0b414eae1683..692a0ad9a768 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -261,8 +261,8 @@ static u32 rw_with_mcr_steering_fw(struct intel_gt *gt, * No need to save old steering reg value. */ intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR, - REG_FIELD_PREP(MTL_MCR_GROUPID, group) | - REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance) | + FIELD_PREP(MTL_MCR_GROUPID, group) | + FIELD_PREP(MTL_MCR_INSTANCEID, instance) | (rw_flag == FW_REG_READ ? GEN11_MCR_MULTICAST : 0)); } else if (GRAPHICS_VER(uncore->i915) >= 11) { mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 718cb2c80f79..c42bc2900c6a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -80,8 +80,8 @@ #define GEN11_MCR_SLICE_MASK GEN11_MCR_SLICE(0xf) #define GEN11_MCR_SUBSLICE(subslice) (((subslice) & 0x7) << 24) #define GEN11_MCR_SUBSLICE_MASK GEN11_MCR_SUBSLICE(0x7) -#define MTL_MCR_GROUPID REG_GENMASK(11, 8) -#define MTL_MCR_INSTANCEID REG_GENMASK(3, 0) +#define MTL_MCR_GROUPID GENMASK(32, 8) +#define MTL_MCR_INSTANCEID GENMASK(3, 0) #define IPEIR_I965 _MMIO(0x2064) #define IPEHR_I965 _MMIO(0x2068) If the driver didn't support 32b CPUs, this would even go unnoticed. Lucas De Marchi >And there are already header for bitfields with a lot of helpers >for (similar) cases if not yours. > >> What would you use for printk format if you wanted to to print >> GENMASK()? > >%lu, no? > >-- >With Best Regards, >Andy Shevchenko > >
On Tue, Jun 20, 2023 at 10:25:21AM -0700, Lucas De Marchi wrote: > On Tue, Jun 20, 2023 at 05:55:19PM +0300, Andy Shevchenko wrote: > > On Tue, Jun 20, 2023 at 05:47:34PM +0300, Jani Nikula wrote: > > > On Thu, 15 Jun 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: > > > > On Fri, May 12, 2023 at 02:45:19PM +0300, Jani Nikula wrote: > > > >> On Fri, 12 May 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: > > > >> > On Fri, May 12, 2023 at 02:25:18PM +0300, Jani Nikula wrote: > > > >> >> On Fri, 12 May 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: > > > >> >> > On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: > > > >> >> >> Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create > > > >> >> >> masks for fixed-width types and also the corresponding BIT_U32(), > > > >> >> >> BIT_U16() and BIT_U8(). > > > >> >> > Why? > > > >> >> > > > >> >> The main reason is that GENMASK() and BIT() size varies for 32/64 bit > > > >> >> builds. > > > >> > > > > >> > When needed GENMASK_ULL() can be used (with respective castings perhaps) > > > >> > and BIT_ULL(), no? > > > >> > > > >> How does that help with making them the same 32-bit size on both 32 and > > > >> 64 bit builds? > > > > > > > > u32 x = GENMASK(); > > > > u64 y = GENMASK_ULL(); > > > > > > > > No? Then use in your code either x or y. Note that I assume that the parameters > > > > to GENMASK*() are built-time constants. Is it the case for you? > > > > > > What's wrong with wanting to define macros with specific size, depending > > > on e.g. hardware registers instead of build size? > > > > Nothing, but I think the problem is smaller than it's presented. > > not sure about big/small problem you are talking about. It's a problem > for when the *device* register is a 32b fixed width, which is > independent from the CPU you are running on. We also have registers that > are u16 and u64. Having fixed-width GENMASK and BIT helps avoiding > mistakes like below. Just to use one example, the diff below builds > fine on my 64b machine, yet it's obviously wrong: > > $ git diff diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c > b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c > index 0b414eae1683..692a0ad9a768 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c > +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c > @@ -261,8 +261,8 @@ static u32 rw_with_mcr_steering_fw(struct intel_gt *gt, > * No need to save old steering reg value. > */ > intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR, > - REG_FIELD_PREP(MTL_MCR_GROUPID, group) | > - REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance) | > + FIELD_PREP(MTL_MCR_GROUPID, group) | > + FIELD_PREP(MTL_MCR_INSTANCEID, instance) | > (rw_flag == FW_REG_READ ? GEN11_MCR_MULTICAST : 0)); > } else if (GRAPHICS_VER(uncore->i915) >= 11) { > mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h > index 718cb2c80f79..c42bc2900c6a 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h > +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h > @@ -80,8 +80,8 @@ > #define GEN11_MCR_SLICE_MASK GEN11_MCR_SLICE(0xf) > #define GEN11_MCR_SUBSLICE(subslice) (((subslice) & 0x7) << 24) > #define GEN11_MCR_SUBSLICE_MASK GEN11_MCR_SUBSLICE(0x7) > -#define MTL_MCR_GROUPID REG_GENMASK(11, 8) > -#define MTL_MCR_INSTANCEID REG_GENMASK(3, 0) > +#define MTL_MCR_GROUPID GENMASK(32, 8) > +#define MTL_MCR_INSTANCEID GENMASK(3, 0) > #define IPEIR_I965 _MMIO(0x2064) > #define IPEHR_I965 _MMIO(0x2068) > > If the driver didn't support 32b CPUs, this would even go unnoticed. So, what does prevent you from using GENMASK_ULL()? Another point, you may teach GENMASK() to issue a warning if hi and/or lo bigger than BITS_PER_LONG. I still don't see the usefulness of that churn. > Lucas De Marchi > > > And there are already header for bitfields with a lot of helpers > > for (similar) cases if not yours. > > > > > What would you use for printk format if you wanted to to print > > > GENMASK()? > > > > %lu, no?
On Tue, Jun 20, 2023 at 08:41:10PM +0300, Andy Shevchenko wrote: >On Tue, Jun 20, 2023 at 10:25:21AM -0700, Lucas De Marchi wrote: >> On Tue, Jun 20, 2023 at 05:55:19PM +0300, Andy Shevchenko wrote: >> > On Tue, Jun 20, 2023 at 05:47:34PM +0300, Jani Nikula wrote: >> > > On Thu, 15 Jun 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: >> > > > On Fri, May 12, 2023 at 02:45:19PM +0300, Jani Nikula wrote: >> > > >> On Fri, 12 May 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: >> > > >> > On Fri, May 12, 2023 at 02:25:18PM +0300, Jani Nikula wrote: >> > > >> >> On Fri, 12 May 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: >> > > >> >> > On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: >> > > >> >> >> Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create >> > > >> >> >> masks for fixed-width types and also the corresponding BIT_U32(), >> > > >> >> >> BIT_U16() and BIT_U8(). > >> > > >> >> > Why? >> > > >> >> >> > > >> >> The main reason is that GENMASK() and BIT() size varies for 32/64 bit >> > > >> >> builds. >> > > >> > >> > > >> > When needed GENMASK_ULL() can be used (with respective castings perhaps) >> > > >> > and BIT_ULL(), no? >> > > >> >> > > >> How does that help with making them the same 32-bit size on both 32 and >> > > >> 64 bit builds? >> > > > >> > > > u32 x = GENMASK(); >> > > > u64 y = GENMASK_ULL(); >> > > > >> > > > No? Then use in your code either x or y. Note that I assume that the parameters >> > > > to GENMASK*() are built-time constants. Is it the case for you? >> > > >> > > What's wrong with wanting to define macros with specific size, depending >> > > on e.g. hardware registers instead of build size? >> > >> > Nothing, but I think the problem is smaller than it's presented. >> >> not sure about big/small problem you are talking about. It's a problem >> for when the *device* register is a 32b fixed width, which is >> independent from the CPU you are running on. We also have registers that >> are u16 and u64. Having fixed-width GENMASK and BIT helps avoiding >> mistakes like below. Just to use one example, the diff below builds >> fine on my 64b machine, yet it's obviously wrong: >> >> $ git diff diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c >> b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c >> index 0b414eae1683..692a0ad9a768 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c >> +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c >> @@ -261,8 +261,8 @@ static u32 rw_with_mcr_steering_fw(struct intel_gt *gt, >> * No need to save old steering reg value. >> */ >> intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR, >> - REG_FIELD_PREP(MTL_MCR_GROUPID, group) | >> - REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance) | >> + FIELD_PREP(MTL_MCR_GROUPID, group) | >> + FIELD_PREP(MTL_MCR_INSTANCEID, instance) | >> (rw_flag == FW_REG_READ ? GEN11_MCR_MULTICAST : 0)); >> } else if (GRAPHICS_VER(uncore->i915) >= 11) { >> mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; >> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h >> index 718cb2c80f79..c42bc2900c6a 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h >> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h >> @@ -80,8 +80,8 @@ >> #define GEN11_MCR_SLICE_MASK GEN11_MCR_SLICE(0xf) >> #define GEN11_MCR_SUBSLICE(subslice) (((subslice) & 0x7) << 24) >> #define GEN11_MCR_SUBSLICE_MASK GEN11_MCR_SUBSLICE(0x7) >> -#define MTL_MCR_GROUPID REG_GENMASK(11, 8) >> -#define MTL_MCR_INSTANCEID REG_GENMASK(3, 0) >> +#define MTL_MCR_GROUPID GENMASK(32, 8) >> +#define MTL_MCR_INSTANCEID GENMASK(3, 0) >> #define IPEIR_I965 _MMIO(0x2064) >> #define IPEHR_I965 _MMIO(0x2068) >> >> If the driver didn't support 32b CPUs, this would even go unnoticed. > >So, what does prevent you from using GENMASK_ULL()? nothing is preventing me to write the wrong code, which is what we are trying to solve. GENMASK_ULL() would generate the wrong code as that particular register is 32b, not 64b, on the GPU. > >Another point, you may teach GENMASK() to issue a warning if hi and/or lo >bigger than BITS_PER_LONG. Which varies depending on the CPU you are building for, so it misses the point. GENMASK_U32/GENMASK_U16/GENMASK_U8 and BIT counterparts would emit a warning if hi is bigger than _exactly_ 32, 16 or 8, regardless of the CPU you built the code for. Lucas De Marchi > >I still don't see the usefulness of that churn. > >> Lucas De Marchi >> >> > And there are already header for bitfields with a lot of helpers >> > for (similar) cases if not yours. >> > >> > > What would you use for printk format if you wanted to to print >> > > GENMASK()? >> > >> > %lu, no? > >-- >With Best Regards, >Andy Shevchenko > >
On Tue, 20 Jun 2023, Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote: > So, what does prevent you from using GENMASK_ULL()? > > Another point, you may teach GENMASK() to issue a warning if hi and/or lo > bigger than BITS_PER_LONG. What good does that do if you want the warning for a fixed size different from unsigned long or long long? Worse, sizeof(long) depends on arch, while the GENMASK you want depends on the use case. > I still don't see the usefulness of that churn. This thread is turning into a prime example of why drivers and subsystems reinvent their own wheels instead of trying to get generally useful stuff merged in kernel headers. :p BR, Jani.
Hi Lucas, all! (Thanks, Andy, for pointing to this thread.) On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: > Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create > masks for fixed-width types and also the corresponding BIT_U32(), > BIT_U16() and BIT_U8(). Can you split BIT() and GENMASK() material to separate patches? > All of those depend on a new "U" suffix added to the integer constant. > Due to naming clashes it's better to call the macro U32. Since C doesn't > have a proper suffix for short and char types, the U16 and U18 variants > just use U32 with one additional check in the BIT_* macros to make > sure the compiler gives an error when the those types overflow. I feel like I don't understand the sentence... > The BIT_U16() and BIT_U8() need the help of GENMASK_INPUT_CHECK(), > as otherwise they would allow an invalid bit to be passed. Hence > implement them in include/linux/bits.h rather than together with > the other BIT* variants. I don't think it's a good way to go because BIT() belongs to a more basic level than GENMASK(). Not mentioning possible header dependency issues. If you need to test against tighter numeric region, I'd suggest to do the same trick as GENMASK_INPUT_CHECK() does, but in uapi/linux/const.h directly. Something like: #define _U8(x) (CONST_GT(U8_MAX, x) + _AC(x, U)) > The following test file is is used to test this: > > $ cat mask.c > #include <linux/types.h> > #include <linux/bits.h> > > static const u32 a = GENMASK_U32(31, 0); > static const u16 b = GENMASK_U16(15, 0); > static const u8 c = GENMASK_U8(7, 0); > static const u32 x = BIT_U32(31); > static const u16 y = BIT_U16(15); > static const u8 z = BIT_U8(7); > > #if FAIL > static const u32 a2 = GENMASK_U32(32, 0); > static const u16 b2 = GENMASK_U16(16, 0); > static const u8 c2 = GENMASK_U8(8, 0); > static const u32 x2 = BIT_U32(32); > static const u16 y2 = BIT_U16(16); > static const u8 z2 = BIT_U8(8); > #endif > > Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com> > --- > include/linux/bits.h | 22 ++++++++++++++++++++++ > include/uapi/linux/const.h | 2 ++ > include/vdso/const.h | 1 + > 3 files changed, 25 insertions(+) > > diff --git a/include/linux/bits.h b/include/linux/bits.h > index 7c0cf5031abe..ff4786c99b8c 100644 > --- a/include/linux/bits.h > +++ b/include/linux/bits.h > @@ -42,4 +42,26 @@ > #define GENMASK_ULL(h, l) \ > (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) > > +#define __GENMASK_U32(h, l) \ > + (((~U32(0)) - (U32(1) << (l)) + 1) & \ > + (~U32(0) >> (32 - 1 - (h)))) > +#define GENMASK_U32(h, l) \ > + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U32(h, l)) > + > +#define __GENMASK_U16(h, l) \ > + ((U32(0xffff) - (U32(1) << (l)) + 1) & \ > + (U32(0xffff) >> (16 - 1 - (h)))) > +#define GENMASK_U16(h, l) \ > + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U16(h, l)) > + > +#define __GENMASK_U8(h, l) \ > + (((U32(0xff)) - (U32(1) << (l)) + 1) & \ > + (U32(0xff) >> (8 - 1 - (h)))) > +#define GENMASK_U8(h, l) \ > + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U8(h, l)) [...] I see nothing wrong with fixed-wight versions of GENMASK if it helps people to write safer code. Can you please in commit message mention the exact patch(es) that added a bug related to GENMASK() misuse? It would be easier to advocate the purpose of new API with that in mind. Regarding implementation - we should avoid copy-pasting in cases like this. Below is the patch that I boot-tested for x86_64 and compile-tested for arm64. It looks less opencoded, and maybe Andy will be less skeptical about this approach because of less maintenance burden. Please take it if you like for v2. Thanks, Yury From 39c5b35075df67e7d88644470ca78a3486367c02 Mon Sep 17 00:00:00 2001 From: Yury Norov <yury.norov@gmail.com> Date: Wed, 21 Jun 2023 15:27:29 -0700 Subject: [PATCH] bits: introduce fixed-type genmasks Generalize __GENMASK() to support different types, and implement fixed-types versions of GENMASK() based on it. Signed-off-by: Yury Norov <yury.norov@gmail.com> --- include/linux/bitops.h | 1 - include/linux/bits.h | 22 ++++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 2ba557e067fe..1db50c69cfdb 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -15,7 +15,6 @@ # define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n))) #endif -#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) #define BITS_TO_LONGS(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) #define BITS_TO_U64(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) #define BITS_TO_U32(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) diff --git a/include/linux/bits.h b/include/linux/bits.h index 7c0cf5031abe..cb94128171b2 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -6,6 +6,8 @@ #include <vdso/bits.h> #include <asm/bitsperlong.h> +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) + #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) #define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG)) @@ -30,16 +32,16 @@ #define GENMASK_INPUT_CHECK(h, l) 0 #endif -#define __GENMASK(h, l) \ - (((~UL(0)) - (UL(1) << (l)) + 1) & \ - (~UL(0) >> (BITS_PER_LONG - 1 - (h)))) -#define GENMASK(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) +#define __GENMASK(t, h, l) \ + (GENMASK_INPUT_CHECK(h, l) + \ + (((t)~0ULL - ((t)(1) << (l)) + 1) & \ + ((t)~0ULL >> (BITS_PER_TYPE(t) - 1 - (h))))) -#define __GENMASK_ULL(h, l) \ - (((~ULL(0)) - (ULL(1) << (l)) + 1) & \ - (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) -#define GENMASK_ULL(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) +#define GENMASK(h, l) __GENMASK(unsigned long, h, l) +#define GENMASK_ULL(h, l) __GENMASK(unsigned long long, h, l) +#define GENMASK_U8(h, l) __GENMASK(u8, h, l) +#define GENMASK_U16(h, l) __GENMASK(u16, h, l) +#define GENMASK_U32(h, l) __GENMASK(u32, h, l) +#define GENMASK_U64(h, l) __GENMASK(u64, h, l) #endif /* __LINUX_BITS_H */
On Wed, Jun 21, 2023 at 07:20:59PM -0700, Yury Norov wrote: >Hi Lucas, all! > >(Thanks, Andy, for pointing to this thread.) > >On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: >> Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create >> masks for fixed-width types and also the corresponding BIT_U32(), >> BIT_U16() and BIT_U8(). > >Can you split BIT() and GENMASK() material to separate patches? > >> All of those depend on a new "U" suffix added to the integer constant. >> Due to naming clashes it's better to call the macro U32. Since C doesn't >> have a proper suffix for short and char types, the U16 and U18 variants >> just use U32 with one additional check in the BIT_* macros to make >> sure the compiler gives an error when the those types overflow. > >I feel like I don't understand the sentence... maybe it was a digression of the integer constants > >> The BIT_U16() and BIT_U8() need the help of GENMASK_INPUT_CHECK(), >> as otherwise they would allow an invalid bit to be passed. Hence >> implement them in include/linux/bits.h rather than together with >> the other BIT* variants. > >I don't think it's a good way to go because BIT() belongs to a more basic >level than GENMASK(). Not mentioning possible header dependency issues. >If you need to test against tighter numeric region, I'd suggest to >do the same trick as GENMASK_INPUT_CHECK() does, but in uapi/linux/const.h >directly. Something like: > #define _U8(x) (CONST_GT(U8_MAX, x) + _AC(x, U)) > >> The following test file is is used to test this: >> >> $ cat mask.c >> #include <linux/types.h> >> #include <linux/bits.h> >> >> static const u32 a = GENMASK_U32(31, 0); >> static const u16 b = GENMASK_U16(15, 0); >> static const u8 c = GENMASK_U8(7, 0); >> static const u32 x = BIT_U32(31); >> static const u16 y = BIT_U16(15); >> static const u8 z = BIT_U8(7); >> >> #if FAIL >> static const u32 a2 = GENMASK_U32(32, 0); >> static const u16 b2 = GENMASK_U16(16, 0); >> static const u8 c2 = GENMASK_U8(8, 0); >> static const u32 x2 = BIT_U32(32); >> static const u16 y2 = BIT_U16(16); >> static const u8 z2 = BIT_U8(8); >> #endif >> >> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com> >> --- >> include/linux/bits.h | 22 ++++++++++++++++++++++ >> include/uapi/linux/const.h | 2 ++ >> include/vdso/const.h | 1 + >> 3 files changed, 25 insertions(+) >> >> diff --git a/include/linux/bits.h b/include/linux/bits.h >> index 7c0cf5031abe..ff4786c99b8c 100644 >> --- a/include/linux/bits.h >> +++ b/include/linux/bits.h >> @@ -42,4 +42,26 @@ >> #define GENMASK_ULL(h, l) \ >> (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) >> >> +#define __GENMASK_U32(h, l) \ >> + (((~U32(0)) - (U32(1) << (l)) + 1) & \ >> + (~U32(0) >> (32 - 1 - (h)))) >> +#define GENMASK_U32(h, l) \ >> + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U32(h, l)) >> + >> +#define __GENMASK_U16(h, l) \ >> + ((U32(0xffff) - (U32(1) << (l)) + 1) & \ >> + (U32(0xffff) >> (16 - 1 - (h)))) >> +#define GENMASK_U16(h, l) \ >> + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U16(h, l)) >> + >> +#define __GENMASK_U8(h, l) \ >> + (((U32(0xff)) - (U32(1) << (l)) + 1) & \ >> + (U32(0xff) >> (8 - 1 - (h)))) >> +#define GENMASK_U8(h, l) \ >> + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U8(h, l)) > >[...] > >I see nothing wrong with fixed-wight versions of GENMASK if it helps >people to write safer code. Can you please in commit message mention >the exact patch(es) that added a bug related to GENMASK() misuse? It >would be easier to advocate the purpose of new API with that in mind. > >Regarding implementation - we should avoid copy-pasting in cases >like this. Below is the patch that I boot-tested for x86_64 and >compile-tested for arm64. > >It looks less opencoded, and maybe Andy will be less skeptical about >this approach because of less maintenance burden. Please take it if >you like for v2. > >Thanks, >Yury > >From 39c5b35075df67e7d88644470ca78a3486367c02 Mon Sep 17 00:00:00 2001 >From: Yury Norov <yury.norov@gmail.com> >Date: Wed, 21 Jun 2023 15:27:29 -0700 >Subject: [PATCH] bits: introduce fixed-type genmasks > >Generalize __GENMASK() to support different types, and implement >fixed-types versions of GENMASK() based on it. > >Signed-off-by: Yury Norov <yury.norov@gmail.com> >--- > include/linux/bitops.h | 1 - > include/linux/bits.h | 22 ++++++++++++---------- > 2 files changed, 12 insertions(+), 11 deletions(-) > >diff --git a/include/linux/bitops.h b/include/linux/bitops.h >index 2ba557e067fe..1db50c69cfdb 100644 >--- a/include/linux/bitops.h >+++ b/include/linux/bitops.h >@@ -15,7 +15,6 @@ > # define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n))) > #endif > >-#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) > #define BITS_TO_LONGS(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) > #define BITS_TO_U64(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) > #define BITS_TO_U32(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) >diff --git a/include/linux/bits.h b/include/linux/bits.h >index 7c0cf5031abe..cb94128171b2 100644 >--- a/include/linux/bits.h >+++ b/include/linux/bits.h >@@ -6,6 +6,8 @@ > #include <vdso/bits.h> > #include <asm/bitsperlong.h> > >+#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) >+ > #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) > #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) > #define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG)) >@@ -30,16 +32,16 @@ > #define GENMASK_INPUT_CHECK(h, l) 0 > #endif > >-#define __GENMASK(h, l) \ >- (((~UL(0)) - (UL(1) << (l)) + 1) & \ >- (~UL(0) >> (BITS_PER_LONG - 1 - (h)))) >-#define GENMASK(h, l) \ >- (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) >+#define __GENMASK(t, h, l) \ >+ (GENMASK_INPUT_CHECK(h, l) + \ >+ (((t)~0ULL - ((t)(1) << (l)) + 1) & \ >+ ((t)~0ULL >> (BITS_PER_TYPE(t) - 1 - (h))))) yeah... forcing the use of ull and then casting to the type is simpler and does the job. Checked that it does not break the build if h is greater than the type and it works ../include/linux/bits.h:40:20: error: right shift count >= width of type [-Werror=shift-count-overflow] 40 | ((t)~0ULL >> (BITS_PER_TYPE(t) - 1 - (h))))) | ^~ However this new version does increase the size. Using i915 module to test: $ size build64/drivers/gpu/drm/i915/i915.ko* text data bss dec hex filename 4355676 213473 7048 4576197 45d3c5 build64/drivers/gpu/drm/i915/i915.ko 4361052 213505 7048 4581605 45e8e5 build64/drivers/gpu/drm/i915/i915.ko.new Lucas De Marchi > >-#define __GENMASK_ULL(h, l) \ >- (((~ULL(0)) - (ULL(1) << (l)) + 1) & \ >- (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) >-#define GENMASK_ULL(h, l) \ >- (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) >+#define GENMASK(h, l) __GENMASK(unsigned long, h, l) >+#define GENMASK_ULL(h, l) __GENMASK(unsigned long long, h, l) >+#define GENMASK_U8(h, l) __GENMASK(u8, h, l) >+#define GENMASK_U16(h, l) __GENMASK(u16, h, l) >+#define GENMASK_U32(h, l) __GENMASK(u32, h, l) >+#define GENMASK_U64(h, l) __GENMASK(u64, h, l) > > #endif /* __LINUX_BITS_H */ >-- >2.39.2 >
+ Rasmus Villemoes <linux@rasmusvillemoes.dk> > > -#define __GENMASK(h, l) \ > > - (((~UL(0)) - (UL(1) << (l)) + 1) & \ > > - (~UL(0) >> (BITS_PER_LONG - 1 - (h)))) > > -#define GENMASK(h, l) \ > > - (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) > > +#define __GENMASK(t, h, l) \ > > + (GENMASK_INPUT_CHECK(h, l) + \ > > + (((t)~0ULL - ((t)(1) << (l)) + 1) & \ > > + ((t)~0ULL >> (BITS_PER_TYPE(t) - 1 - (h))))) > > yeah... forcing the use of ull and then casting to the type is simpler > and does the job. Checked that it does not break the build if h is > greater than the type and it works > > ../include/linux/bits.h:40:20: error: right shift count >= width of type [-Werror=shift-count-overflow] > 40 | ((t)~0ULL >> (BITS_PER_TYPE(t) - 1 - (h))))) > | ^~ > > However this new version does increase the size. Using i915 module > to test: > > $ size build64/drivers/gpu/drm/i915/i915.ko* > text data bss dec hex filename > 4355676 213473 7048 4576197 45d3c5 build64/drivers/gpu/drm/i915/i915.ko > 4361052 213505 7048 4581605 45e8e5 build64/drivers/gpu/drm/i915/i915.ko.new It sounds weird because all that should anyways boil down at compile time... I enabled DRM_I915 in config and ran bloat-o-meter against today's master, and I don't see that much difference. $ size vmlinux vmlinux.new text data bss dec hex filename 44978613 23962202 3026948 71967763 44a2413 vmlinux 44978653 23966298 3026948 71971899 44a343b vmlinux.new $ scripts/bloat-o-meter vmlinux vmlinux.new add/remove: 0/0 grow/shrink: 3/2 up/down: 28/-5 (23) Function old new delta kvm_mmu_reset_all_pte_masks 623 639 +16 intel_psr_invalidate 1112 1119 +7 intel_drrs_activate 624 629 +5 intel_psr_flush 1410 1409 -1 clk_fractional_divider_general_approximation 207 203 -4 Total: Before=35398799, After=35398822, chg +0.00% Can you please check your numbers? Interestingly, the kvm_mmu_reset_all_pte_masks() uses GENMASK_ULL(), which should generate the same code across versions. Maybe it's just a noise? Rasmus, can you please take a look? Thanks, Yury
Hi, Reviving this thread as now with xe driver merged we have 2 users for a fixed-width BIT/GENMASK. On Wed, Jun 21, 2023 at 07:20:59PM -0700, Yury Norov wrote: >Hi Lucas, all! > >(Thanks, Andy, for pointing to this thread.) > >On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: >> Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create >> masks for fixed-width types and also the corresponding BIT_U32(), >> BIT_U16() and BIT_U8(). > >Can you split BIT() and GENMASK() material to separate patches? > >> All of those depend on a new "U" suffix added to the integer constant. >> Due to naming clashes it's better to call the macro U32. Since C doesn't >> have a proper suffix for short and char types, the U16 and U18 variants >> just use U32 with one additional check in the BIT_* macros to make >> sure the compiler gives an error when the those types overflow. > >I feel like I don't understand the sentence... > >> The BIT_U16() and BIT_U8() need the help of GENMASK_INPUT_CHECK(), >> as otherwise they would allow an invalid bit to be passed. Hence >> implement them in include/linux/bits.h rather than together with >> the other BIT* variants. > >I don't think it's a good way to go because BIT() belongs to a more basic >level than GENMASK(). Not mentioning possible header dependency issues. >If you need to test against tighter numeric region, I'd suggest to >do the same trick as GENMASK_INPUT_CHECK() does, but in uapi/linux/const.h >directly. Something like: > #define _U8(x) (CONST_GT(U8_MAX, x) + _AC(x, U)) but then make uapi/linux/const.h include linux/build_bug.h? I was thinking about leaving BIT() define where it is, and add the fixed-width versions in this header. I was thinking uapi/linux/const.h was more about allowing the U/ULL suffixes for things shared with asm. Lucas De Marchi > >> The following test file is is used to test this: >> >> $ cat mask.c >> #include <linux/types.h> >> #include <linux/bits.h> >> >> static const u32 a = GENMASK_U32(31, 0); >> static const u16 b = GENMASK_U16(15, 0); >> static const u8 c = GENMASK_U8(7, 0); >> static const u32 x = BIT_U32(31); >> static const u16 y = BIT_U16(15); >> static const u8 z = BIT_U8(7); >> >> #if FAIL >> static const u32 a2 = GENMASK_U32(32, 0); >> static const u16 b2 = GENMASK_U16(16, 0); >> static const u8 c2 = GENMASK_U8(8, 0); >> static const u32 x2 = BIT_U32(32); >> static const u16 y2 = BIT_U16(16); >> static const u8 z2 = BIT_U8(8); >> #endif >> >> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com> >> --- >> include/linux/bits.h | 22 ++++++++++++++++++++++ >> include/uapi/linux/const.h | 2 ++ >> include/vdso/const.h | 1 + >> 3 files changed, 25 insertions(+) >> >> diff --git a/include/linux/bits.h b/include/linux/bits.h >> index 7c0cf5031abe..ff4786c99b8c 100644 >> --- a/include/linux/bits.h >> +++ b/include/linux/bits.h >> @@ -42,4 +42,26 @@ >> #define GENMASK_ULL(h, l) \ >> (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) >> >> +#define __GENMASK_U32(h, l) \ >> + (((~U32(0)) - (U32(1) << (l)) + 1) & \ >> + (~U32(0) >> (32 - 1 - (h)))) >> +#define GENMASK_U32(h, l) \ >> + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U32(h, l)) >> + >> +#define __GENMASK_U16(h, l) \ >> + ((U32(0xffff) - (U32(1) << (l)) + 1) & \ >> + (U32(0xffff) >> (16 - 1 - (h)))) >> +#define GENMASK_U16(h, l) \ >> + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U16(h, l)) >> + >> +#define __GENMASK_U8(h, l) \ >> + (((U32(0xff)) - (U32(1) << (l)) + 1) & \ >> + (U32(0xff) >> (8 - 1 - (h)))) >> +#define GENMASK_U8(h, l) \ >> + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U8(h, l)) > >[...] > >I see nothing wrong with fixed-wight versions of GENMASK if it helps >people to write safer code. Can you please in commit message mention >the exact patch(es) that added a bug related to GENMASK() misuse? It >would be easier to advocate the purpose of new API with that in mind. > >Regarding implementation - we should avoid copy-pasting in cases >like this. Below is the patch that I boot-tested for x86_64 and >compile-tested for arm64. > >It looks less opencoded, and maybe Andy will be less skeptical about >this approach because of less maintenance burden. Please take it if >you like for v2. > >Thanks, >Yury > >From 39c5b35075df67e7d88644470ca78a3486367c02 Mon Sep 17 00:00:00 2001 >From: Yury Norov <yury.norov@gmail.com> >Date: Wed, 21 Jun 2023 15:27:29 -0700 >Subject: [PATCH] bits: introduce fixed-type genmasks > >Generalize __GENMASK() to support different types, and implement >fixed-types versions of GENMASK() based on it. > >Signed-off-by: Yury Norov <yury.norov@gmail.com> >--- > include/linux/bitops.h | 1 - > include/linux/bits.h | 22 ++++++++++++---------- > 2 files changed, 12 insertions(+), 11 deletions(-) > >diff --git a/include/linux/bitops.h b/include/linux/bitops.h >index 2ba557e067fe..1db50c69cfdb 100644 >--- a/include/linux/bitops.h >+++ b/include/linux/bitops.h >@@ -15,7 +15,6 @@ > # define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n))) > #endif > >-#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) > #define BITS_TO_LONGS(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) > #define BITS_TO_U64(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) > #define BITS_TO_U32(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) >diff --git a/include/linux/bits.h b/include/linux/bits.h >index 7c0cf5031abe..cb94128171b2 100644 >--- a/include/linux/bits.h >+++ b/include/linux/bits.h >@@ -6,6 +6,8 @@ > #include <vdso/bits.h> > #include <asm/bitsperlong.h> > >+#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) >+ > #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) > #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) > #define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG)) >@@ -30,16 +32,16 @@ > #define GENMASK_INPUT_CHECK(h, l) 0 > #endif > >-#define __GENMASK(h, l) \ >- (((~UL(0)) - (UL(1) << (l)) + 1) & \ >- (~UL(0) >> (BITS_PER_LONG - 1 - (h)))) >-#define GENMASK(h, l) \ >- (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) >+#define __GENMASK(t, h, l) \ >+ (GENMASK_INPUT_CHECK(h, l) + \ >+ (((t)~0ULL - ((t)(1) << (l)) + 1) & \ >+ ((t)~0ULL >> (BITS_PER_TYPE(t) - 1 - (h))))) > >-#define __GENMASK_ULL(h, l) \ >- (((~ULL(0)) - (ULL(1) << (l)) + 1) & \ >- (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) >-#define GENMASK_ULL(h, l) \ >- (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) >+#define GENMASK(h, l) __GENMASK(unsigned long, h, l) >+#define GENMASK_ULL(h, l) __GENMASK(unsigned long long, h, l) >+#define GENMASK_U8(h, l) __GENMASK(u8, h, l) >+#define GENMASK_U16(h, l) __GENMASK(u16, h, l) >+#define GENMASK_U32(h, l) __GENMASK(u32, h, l) >+#define GENMASK_U64(h, l) __GENMASK(u64, h, l) > > #endif /* __LINUX_BITS_H */ >-- >2.39.2 >
On Thu, Jan 18, 2024 at 02:42:12PM -0600, Lucas De Marchi wrote: > Hi, > > Reviving this thread as now with xe driver merged we have 2 users for > a fixed-width BIT/GENMASK. Can you point where and why? > On Wed, Jun 21, 2023 at 07:20:59PM -0700, Yury Norov wrote: > > Hi Lucas, all! > > > > (Thanks, Andy, for pointing to this thread.) > > > > On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: > > > Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create > > > masks for fixed-width types and also the corresponding BIT_U32(), > > > BIT_U16() and BIT_U8(). > > > > Can you split BIT() and GENMASK() material to separate patches? > > > > > All of those depend on a new "U" suffix added to the integer constant. > > > Due to naming clashes it's better to call the macro U32. Since C doesn't > > > have a proper suffix for short and char types, the U16 and U18 variants > > > just use U32 with one additional check in the BIT_* macros to make > > > sure the compiler gives an error when the those types overflow. > > > > I feel like I don't understand the sentence... > > > > > The BIT_U16() and BIT_U8() need the help of GENMASK_INPUT_CHECK(), > > > as otherwise they would allow an invalid bit to be passed. Hence > > > implement them in include/linux/bits.h rather than together with > > > the other BIT* variants. > > > > I don't think it's a good way to go because BIT() belongs to a more basic > > level than GENMASK(). Not mentioning possible header dependency issues. > > If you need to test against tighter numeric region, I'd suggest to > > do the same trick as GENMASK_INPUT_CHECK() does, but in uapi/linux/const.h > > directly. Something like: > > #define _U8(x) (CONST_GT(U8_MAX, x) + _AC(x, U)) > > but then make uapi/linux/const.h include linux/build_bug.h? > I was thinking about leaving BIT() define where it is, and add the > fixed-width versions in this header. I was thinking uapi/linux/const.h > was more about allowing the U/ULL suffixes for things shared with asm. You can't include kernel headers in uapi code. But you can try doing vice-versa: implement or move the pieces you need to share to the uapi/linux/const.h, and use them in the kernel code. In the worst case, you can just implement the macro you need in the uapi header, and make it working that way. Can you confirm that my proposal increases the kernel size? If so, is there any way to fix it? If it doesn't, I'd prefer to use the __GENMASK() approach. Thanks, Yury
On Thu, Jan 18, 2024 at 01:48:43PM -0800, Yury Norov wrote: >On Thu, Jan 18, 2024 at 02:42:12PM -0600, Lucas De Marchi wrote: >> Hi, >> >> Reviving this thread as now with xe driver merged we have 2 users for >> a fixed-width BIT/GENMASK. > >Can you point where and why? See users of REG_GENMASK and REG_BIT in drivers/gpu/drm/i915 and drivers/gpu/drm/xe. I think the register definition in the xe shows it in a good way: drivers/gpu/drm/xe/regs/xe_gt_regs.h The GPU registers are mostly 32-bit wide. We don't want to accidently do something like below (s/30/33/ added for illustration purposes): #define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8) #define DISABLE_D8_D16_COASLESCE REG_BIT(33) Same thing for GENMASK family of macros and for registers that are 16 or 8 bits. See e.g. drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h > >> On Wed, Jun 21, 2023 at 07:20:59PM -0700, Yury Norov wrote: >> > Hi Lucas, all! >> > >> > (Thanks, Andy, for pointing to this thread.) >> > >> > On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: >> > > Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create >> > > masks for fixed-width types and also the corresponding BIT_U32(), >> > > BIT_U16() and BIT_U8(). >> > >> > Can you split BIT() and GENMASK() material to separate patches? >> > >> > > All of those depend on a new "U" suffix added to the integer constant. >> > > Due to naming clashes it's better to call the macro U32. Since C doesn't >> > > have a proper suffix for short and char types, the U16 and U18 variants >> > > just use U32 with one additional check in the BIT_* macros to make >> > > sure the compiler gives an error when the those types overflow. >> > >> > I feel like I don't understand the sentence... >> > >> > > The BIT_U16() and BIT_U8() need the help of GENMASK_INPUT_CHECK(), >> > > as otherwise they would allow an invalid bit to be passed. Hence >> > > implement them in include/linux/bits.h rather than together with >> > > the other BIT* variants. >> > >> > I don't think it's a good way to go because BIT() belongs to a more basic >> > level than GENMASK(). Not mentioning possible header dependency issues. >> > If you need to test against tighter numeric region, I'd suggest to >> > do the same trick as GENMASK_INPUT_CHECK() does, but in uapi/linux/const.h >> > directly. Something like: >> > #define _U8(x) (CONST_GT(U8_MAX, x) + _AC(x, U)) >> >> but then make uapi/linux/const.h include linux/build_bug.h? >> I was thinking about leaving BIT() define where it is, and add the >> fixed-width versions in this header. I was thinking uapi/linux/const.h >> was more about allowing the U/ULL suffixes for things shared with asm. > >You can't include kernel headers in uapi code. But you can try doing >vice-versa: implement or move the pieces you need to share to the >uapi/linux/const.h, and use them in the kernel code. but in this CONST_GE() should trigger a BUG/static_assert on U8_MAX < x. AFAICS that check can't be on the uapi/ side, so there's nothing much left to change in uapi/linux/const.h. I'd expect drivers to be the primary user of these fixed-width BIT variants, hence the proposal to do in include/linux/bits.h. Ssomething like this WIP/untested diff (on top of your previous patch): diff --git a/include/linux/bits.h b/include/linux/bits.h index cb94128171b2..409cd10f7597 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -24,12 +24,16 @@ #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ __is_constexpr((l) > (h)), (l) > (h), 0))) +#define BIT_INPUT_CHECK(type, b) \ + ((BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ + __is_constexpr(b), (b) >= BITS_PER_TYPE(type), 0)))) #else /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, * disable the input check if that is the case. */ #define GENMASK_INPUT_CHECK(h, l) 0 +#define BIT_INPUT_CHECK(type, b) 0 #endif #define __GENMASK(t, h, l) \ @@ -44,4 +48,9 @@ #define GENMASK_U32(h, l) __GENMASK(u32, h, l) #define GENMASK_U64(h, l) __GENMASK(u64, h, l) +#define BIT_U8(b) (u8)(BIT_INPUT_CHECK(u8, b) + BIT(b)) +#define BIT_U16(b) (u16)(BIT_INPUT_CHECK(u16, b) + BIT(b)) +#define BIT_U32(b) (u32)(BIT_INPUT_CHECK(u32, b) + BIT(b)) +#define BIT_U64(b) (u64)(BIT_INPUT_CHECK(u64, b) + BIT(b)) + #endif /* __LINUX_BITS_H */ > >In the worst case, you can just implement the macro you need in the >uapi header, and make it working that way. > >Can you confirm that my proposal increases the kernel size? If so, is >there any way to fix it? If it doesn't, I'd prefer to use the >__GENMASK() approach. I agree on continuing with your approach. The bloat-o-meter indeed showed almost no difference. `size ....i915.o` on the other hand increased, but then decreased when I replaced our current REG_GENMASK() implementation to reuse the new GENMASK_U*() $ # test-genmask.00: before any change $ # test-genmask.01: after your patch to GENMASK $ # test-genmask.01: after converting drivers/gpu/drm/i915/i915_reg_defs.h to use the new macros $ size build64/drivers/gpu/drm/i915/i915.o-test-genmask.* text data bss dec hex filename 4506628 215083 7168 4728879 48282f build64/drivers/gpu/drm/i915/i915.o-test-genmask.00 4511084 215083 7168 4733335 483997 build64/drivers/gpu/drm/i915/i915.o-test-genmask.01 4493292 215083 7168 4715543 47f417 build64/drivers/gpu/drm/i915/i915.o-test-genmask.02 $ ./scripts/bloat-o-meter build64/drivers/gpu/drm/i915/i915.o-test-genmask.0[01] add/remove: 0/0 grow/shrink: 2/1 up/down: 4/-5 (-1) Function old new delta intel_drrs_activate 399 402 +3 intel_psr_invalidate 546 547 +1 intel_psr_flush 880 875 -5 Total: Before=2980530, After=2980529, chg -0.00% $ ./scripts/bloat-o-meter build64/drivers/gpu/drm/i915/i915.o-test-genmask.0[12] add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0 (0) Function old new delta Total: Before=2980529, After=2980529, chg +0.00% thanks Lucas De Marchi > >Thanks, >Yury
On Thu, Jan 18, 2024 at 05:25:00PM -0600, Lucas De Marchi wrote: > SA2PR11MB4874 > X-OriginatorOrg: intel.com > Status: RO > Content-Length: 6257 > Lines: 150 > > On Thu, Jan 18, 2024 at 01:48:43PM -0800, Yury Norov wrote: > > On Thu, Jan 18, 2024 at 02:42:12PM -0600, Lucas De Marchi wrote: > > > Hi, > > > > > > Reviving this thread as now with xe driver merged we have 2 users for > > > a fixed-width BIT/GENMASK. > > > > Can you point where and why? > > See users of REG_GENMASK and REG_BIT in drivers/gpu/drm/i915 and > drivers/gpu/drm/xe. I think the register definition in the xe shows it > in a good way: > > drivers/gpu/drm/xe/regs/xe_gt_regs.h > > The GPU registers are mostly 32-bit wide. We don't want to accidently do > something like below (s/30/33/ added for illustration purposes): > > #define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8) > #define DISABLE_D8_D16_COASLESCE REG_BIT(33) > > Same thing for GENMASK family of macros and for registers that are 16 or > 8 bits. See e.g. drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h > > > > > > > On Wed, Jun 21, 2023 at 07:20:59PM -0700, Yury Norov wrote: > > > > Hi Lucas, all! > > > > > > > > (Thanks, Andy, for pointing to this thread.) > > > > > > > > On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: > > > > > Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create > > > > > masks for fixed-width types and also the corresponding BIT_U32(), > > > > > BIT_U16() and BIT_U8(). > > > > > > > > Can you split BIT() and GENMASK() material to separate patches? > > > > > > > > > All of those depend on a new "U" suffix added to the integer constant. > > > > > Due to naming clashes it's better to call the macro U32. Since C doesn't > > > > > have a proper suffix for short and char types, the U16 and U18 variants > > > > > just use U32 with one additional check in the BIT_* macros to make > > > > > sure the compiler gives an error when the those types overflow. > > > > > > > > I feel like I don't understand the sentence... > > > > > > > > > The BIT_U16() and BIT_U8() need the help of GENMASK_INPUT_CHECK(), > > > > > as otherwise they would allow an invalid bit to be passed. Hence > > > > > implement them in include/linux/bits.h rather than together with > > > > > the other BIT* variants. > > > > > > > > I don't think it's a good way to go because BIT() belongs to a more basic > > > > level than GENMASK(). Not mentioning possible header dependency issues. > > > > If you need to test against tighter numeric region, I'd suggest to > > > > do the same trick as GENMASK_INPUT_CHECK() does, but in uapi/linux/const.h > > > > directly. Something like: > > > > #define _U8(x) (CONST_GT(U8_MAX, x) + _AC(x, U)) > > > > > > but then make uapi/linux/const.h include linux/build_bug.h? > > > I was thinking about leaving BIT() define where it is, and add the > > > fixed-width versions in this header. I was thinking uapi/linux/const.h > > > was more about allowing the U/ULL suffixes for things shared with asm. > > > > You can't include kernel headers in uapi code. But you can try doing > > vice-versa: implement or move the pieces you need to share to the > > uapi/linux/const.h, and use them in the kernel code. > > but in this CONST_GE() should trigger a BUG/static_assert > on U8_MAX < x. AFAICS that check can't be on the uapi/ side, > so there's nothing much left to change in uapi/linux/const.h. > > I'd expect drivers to be the primary user of these fixed-width BIT > variants, hence the proposal to do in include/linux/bits.h. > Ssomething like this WIP/untested diff (on top of your previous patch): > > > diff --git a/include/linux/bits.h b/include/linux/bits.h > index cb94128171b2..409cd10f7597 100644 > --- a/include/linux/bits.h > +++ b/include/linux/bits.h > @@ -24,12 +24,16 @@ > #define GENMASK_INPUT_CHECK(h, l) \ > (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ > __is_constexpr((l) > (h)), (l) > (h), 0))) > +#define BIT_INPUT_CHECK(type, b) \ > + ((BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ > + __is_constexpr(b), (b) >= BITS_PER_TYPE(type), 0)))) > #else > /* > * BUILD_BUG_ON_ZERO is not available in h files included from asm files, > * disable the input check if that is the case. > */ > #define GENMASK_INPUT_CHECK(h, l) 0 > +#define BIT_INPUT_CHECK(type, b) 0 > #endif > #define __GENMASK(t, h, l) \ > @@ -44,4 +48,9 @@ > #define GENMASK_U32(h, l) __GENMASK(u32, h, l) > #define GENMASK_U64(h, l) __GENMASK(u64, h, l) > +#define BIT_U8(b) (u8)(BIT_INPUT_CHECK(u8, b) + BIT(b)) > +#define BIT_U16(b) (u16)(BIT_INPUT_CHECK(u16, b) + BIT(b)) > +#define BIT_U32(b) (u32)(BIT_INPUT_CHECK(u32, b) + BIT(b)) > +#define BIT_U64(b) (u64)(BIT_INPUT_CHECK(u64, b) + BIT(b)) Can you add some vertical spacing here, like between GENMASK and BIT blocks? > + > #endif /* __LINUX_BITS_H */ > > > > > In the worst case, you can just implement the macro you need in the > > uapi header, and make it working that way. > > > > Can you confirm that my proposal increases the kernel size? If so, is > > there any way to fix it? If it doesn't, I'd prefer to use the > > __GENMASK() approach. > > I agree on continuing with your approach. The bloat-o-meter indeed > showed almost no difference. `size ....i915.o` on the other hand > increased, but then decreased when I replaced our current REG_GENMASK() > implementation to reuse the new GENMASK_U*() > > $ # test-genmask.00: before any change > $ # test-genmask.01: after your patch to GENMASK > $ # test-genmask.01: after converting drivers/gpu/drm/i915/i915_reg_defs.h > to use the new macros > $ size build64/drivers/gpu/drm/i915/i915.o-test-genmask.* > text data bss dec hex filename > 4506628 215083 7168 4728879 48282f build64/drivers/gpu/drm/i915/i915.o-test-genmask.00 > 4511084 215083 7168 4733335 483997 build64/drivers/gpu/drm/i915/i915.o-test-genmask.01 > 4493292 215083 7168 4715543 47f417 build64/drivers/gpu/drm/i915/i915.o-test-genmask.02 > > $ ./scripts/bloat-o-meter build64/drivers/gpu/drm/i915/i915.o-test-genmask.0[01] > add/remove: 0/0 grow/shrink: 2/1 up/down: 4/-5 (-1) > Function old new delta > intel_drrs_activate 399 402 +3 > intel_psr_invalidate 546 547 +1 > intel_psr_flush 880 875 -5 > Total: Before=2980530, After=2980529, chg -0.00% > > $ ./scripts/bloat-o-meter build64/drivers/gpu/drm/i915/i915.o-test-genmask.0[12] > add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0 (0) > Function old new delta > Total OK then. With the above approach, fixed-type BIT() macros look like wrappers around the plain BIT(), and I think, we can live with that. Can you send all the material as a proper series, including my GENMASK patch, your patch above and a patch that switches your driver to using the new API? I'll take it then in bitmap-for-next when the merge window will get closed. Thanks, Yury
On Thu, Jan 18, 2024 at 06:01:58PM -0800, Yury Norov wrote: >On Thu, Jan 18, 2024 at 05:25:00PM -0600, Lucas De Marchi wrote: >> SA2PR11MB4874 >> X-OriginatorOrg: intel.com >> Status: RO >> Content-Length: 6257 >> Lines: 150 >> >> On Thu, Jan 18, 2024 at 01:48:43PM -0800, Yury Norov wrote: >> > On Thu, Jan 18, 2024 at 02:42:12PM -0600, Lucas De Marchi wrote: >> > > Hi, >> > > >> > > Reviving this thread as now with xe driver merged we have 2 users for >> > > a fixed-width BIT/GENMASK. >> > >> > Can you point where and why? >> >> See users of REG_GENMASK and REG_BIT in drivers/gpu/drm/i915 and >> drivers/gpu/drm/xe. I think the register definition in the xe shows it >> in a good way: >> >> drivers/gpu/drm/xe/regs/xe_gt_regs.h >> >> The GPU registers are mostly 32-bit wide. We don't want to accidently do >> something like below (s/30/33/ added for illustration purposes): >> >> #define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8) >> #define DISABLE_D8_D16_COASLESCE REG_BIT(33) >> >> Same thing for GENMASK family of macros and for registers that are 16 or >> 8 bits. See e.g. drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h >> >> >> > >> > > On Wed, Jun 21, 2023 at 07:20:59PM -0700, Yury Norov wrote: >> > > > Hi Lucas, all! >> > > > >> > > > (Thanks, Andy, for pointing to this thread.) >> > > > >> > > > On Mon, May 08, 2023 at 10:14:02PM -0700, Lucas De Marchi wrote: >> > > > > Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create >> > > > > masks for fixed-width types and also the corresponding BIT_U32(), >> > > > > BIT_U16() and BIT_U8(). >> > > > >> > > > Can you split BIT() and GENMASK() material to separate patches? >> > > > >> > > > > All of those depend on a new "U" suffix added to the integer constant. >> > > > > Due to naming clashes it's better to call the macro U32. Since C doesn't >> > > > > have a proper suffix for short and char types, the U16 and U18 variants >> > > > > just use U32 with one additional check in the BIT_* macros to make >> > > > > sure the compiler gives an error when the those types overflow. >> > > > >> > > > I feel like I don't understand the sentence... >> > > > >> > > > > The BIT_U16() and BIT_U8() need the help of GENMASK_INPUT_CHECK(), >> > > > > as otherwise they would allow an invalid bit to be passed. Hence >> > > > > implement them in include/linux/bits.h rather than together with >> > > > > the other BIT* variants. >> > > > >> > > > I don't think it's a good way to go because BIT() belongs to a more basic >> > > > level than GENMASK(). Not mentioning possible header dependency issues. >> > > > If you need to test against tighter numeric region, I'd suggest to >> > > > do the same trick as GENMASK_INPUT_CHECK() does, but in uapi/linux/const.h >> > > > directly. Something like: >> > > > #define _U8(x) (CONST_GT(U8_MAX, x) + _AC(x, U)) >> > > >> > > but then make uapi/linux/const.h include linux/build_bug.h? >> > > I was thinking about leaving BIT() define where it is, and add the >> > > fixed-width versions in this header. I was thinking uapi/linux/const.h >> > > was more about allowing the U/ULL suffixes for things shared with asm. >> > >> > You can't include kernel headers in uapi code. But you can try doing >> > vice-versa: implement or move the pieces you need to share to the >> > uapi/linux/const.h, and use them in the kernel code. >> >> but in this CONST_GE() should trigger a BUG/static_assert >> on U8_MAX < x. AFAICS that check can't be on the uapi/ side, >> so there's nothing much left to change in uapi/linux/const.h. >> >> I'd expect drivers to be the primary user of these fixed-width BIT >> variants, hence the proposal to do in include/linux/bits.h. >> Ssomething like this WIP/untested diff (on top of your previous patch): >> >> >> diff --git a/include/linux/bits.h b/include/linux/bits.h >> index cb94128171b2..409cd10f7597 100644 >> --- a/include/linux/bits.h >> +++ b/include/linux/bits.h >> @@ -24,12 +24,16 @@ >> #define GENMASK_INPUT_CHECK(h, l) \ >> (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ >> __is_constexpr((l) > (h)), (l) > (h), 0))) >> +#define BIT_INPUT_CHECK(type, b) \ >> + ((BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ >> + __is_constexpr(b), (b) >= BITS_PER_TYPE(type), 0)))) >> #else >> /* >> * BUILD_BUG_ON_ZERO is not available in h files included from asm files, >> * disable the input check if that is the case. >> */ >> #define GENMASK_INPUT_CHECK(h, l) 0 >> +#define BIT_INPUT_CHECK(type, b) 0 >> #endif >> #define __GENMASK(t, h, l) \ >> @@ -44,4 +48,9 @@ >> #define GENMASK_U32(h, l) __GENMASK(u32, h, l) >> #define GENMASK_U64(h, l) __GENMASK(u64, h, l) >> +#define BIT_U8(b) (u8)(BIT_INPUT_CHECK(u8, b) + BIT(b)) >> +#define BIT_U16(b) (u16)(BIT_INPUT_CHECK(u16, b) + BIT(b)) >> +#define BIT_U32(b) (u32)(BIT_INPUT_CHECK(u32, b) + BIT(b)) >> +#define BIT_U64(b) (u64)(BIT_INPUT_CHECK(u64, b) + BIT(b)) > >Can you add some vertical spacing here, like between GENMASK and BIT >blocks? I think gmail mangled this, because it does show up with more vertical space on the email I sent: https://lore.kernel.org/all/clamvpymzwiehjqd6jhuigymyg5ikxewxyeee2eae4tgzmaz7u@6rposizee3t6/ Anyway, I will clean this up and probably add some docs about its usage. > >> + >> #endif /* __LINUX_BITS_H */ >> >> > >> > In the worst case, you can just implement the macro you need in the >> > uapi header, and make it working that way. >> > >> > Can you confirm that my proposal increases the kernel size? If so, is >> > there any way to fix it? If it doesn't, I'd prefer to use the >> > __GENMASK() approach. >> >> I agree on continuing with your approach. The bloat-o-meter indeed >> showed almost no difference. `size ....i915.o` on the other hand >> increased, but then decreased when I replaced our current REG_GENMASK() >> implementation to reuse the new GENMASK_U*() >> >> $ # test-genmask.00: before any change >> $ # test-genmask.01: after your patch to GENMASK >> $ # test-genmask.01: after converting drivers/gpu/drm/i915/i915_reg_defs.h >> to use the new macros >> $ size build64/drivers/gpu/drm/i915/i915.o-test-genmask.* >> text data bss dec hex filename >> 4506628 215083 7168 4728879 48282f build64/drivers/gpu/drm/i915/i915.o-test-genmask.00 >> 4511084 215083 7168 4733335 483997 build64/drivers/gpu/drm/i915/i915.o-test-genmask.01 >> 4493292 215083 7168 4715543 47f417 build64/drivers/gpu/drm/i915/i915.o-test-genmask.02 >> >> $ ./scripts/bloat-o-meter build64/drivers/gpu/drm/i915/i915.o-test-genmask.0[01] >> add/remove: 0/0 grow/shrink: 2/1 up/down: 4/-5 (-1) >> Function old new delta >> intel_drrs_activate 399 402 +3 >> intel_psr_invalidate 546 547 +1 >> intel_psr_flush 880 875 -5 >> Total: Before=2980530, After=2980529, chg -0.00% >> >> $ ./scripts/bloat-o-meter build64/drivers/gpu/drm/i915/i915.o-test-genmask.0[12] >> add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0 (0) >> Function old new delta >> Total > >OK then. With the above approach, fixed-type BIT() macros look like wrappers >around the plain BIT(), and I think, we can live with that. > >Can you send all the material as a proper series, including my >GENMASK patch, your patch above and a patch that switches your driver >to using the new API? I'll take it then in bitmap-for-next when the >merge window will get closed. sure, thanks Lucas De Marchi > >Thanks, >Yury
diff --git a/include/linux/bits.h b/include/linux/bits.h index 7c0cf5031abe..ff4786c99b8c 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -42,4 +42,26 @@ #define GENMASK_ULL(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) +#define __GENMASK_U32(h, l) \ + (((~U32(0)) - (U32(1) << (l)) + 1) & \ + (~U32(0) >> (32 - 1 - (h)))) +#define GENMASK_U32(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U32(h, l)) + +#define __GENMASK_U16(h, l) \ + ((U32(0xffff) - (U32(1) << (l)) + 1) & \ + (U32(0xffff) >> (16 - 1 - (h)))) +#define GENMASK_U16(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U16(h, l)) + +#define __GENMASK_U8(h, l) \ + (((U32(0xff)) - (U32(1) << (l)) + 1) & \ + (U32(0xff) >> (8 - 1 - (h)))) +#define GENMASK_U8(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U8(h, l)) + +#define BIT_U32(nr) _BITU32(nr) +#define BIT_U16(nr) (GENMASK_INPUT_CHECK(16 - 1, nr) + (U32(1) << (nr))) +#define BIT_U8(nr) (GENMASK_INPUT_CHECK(32 - 1, nr) + (U32(1) << (nr))) + #endif /* __LINUX_BITS_H */ diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h index a429381e7ca5..3a4e152520f4 100644 --- a/include/uapi/linux/const.h +++ b/include/uapi/linux/const.h @@ -22,9 +22,11 @@ #define _AT(T,X) ((T)(X)) #endif +#define _U32(x) (_AC(x, U)) #define _UL(x) (_AC(x, UL)) #define _ULL(x) (_AC(x, ULL)) +#define _BITU32(x) (_U32(1) << (x)) #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) diff --git a/include/vdso/const.h b/include/vdso/const.h index 94b385ad438d..417384a9795b 100644 --- a/include/vdso/const.h +++ b/include/vdso/const.h @@ -4,6 +4,7 @@ #include <uapi/linux/const.h> +#define U32(x) (_U32(x)) #define UL(x) (_UL(x)) #define ULL(x) (_ULL(x))
Add GENMASK_U32(), GENMASK_U16() and GENMASK_U8() macros to create masks for fixed-width types and also the corresponding BIT_U32(), BIT_U16() and BIT_U8(). All of those depend on a new "U" suffix added to the integer constant. Due to naming clashes it's better to call the macro U32. Since C doesn't have a proper suffix for short and char types, the U16 and U18 variants just use U32 with one additional check in the BIT_* macros to make sure the compiler gives an error when the those types overflow. The BIT_U16() and BIT_U8() need the help of GENMASK_INPUT_CHECK(), as otherwise they would allow an invalid bit to be passed. Hence implement them in include/linux/bits.h rather than together with the other BIT* variants. The following test file is is used to test this: $ cat mask.c #include <linux/types.h> #include <linux/bits.h> static const u32 a = GENMASK_U32(31, 0); static const u16 b = GENMASK_U16(15, 0); static const u8 c = GENMASK_U8(7, 0); static const u32 x = BIT_U32(31); static const u16 y = BIT_U16(15); static const u8 z = BIT_U8(7); #if FAIL static const u32 a2 = GENMASK_U32(32, 0); static const u16 b2 = GENMASK_U16(16, 0); static const u8 c2 = GENMASK_U8(8, 0); static const u32 x2 = BIT_U32(32); static const u16 y2 = BIT_U16(16); static const u8 z2 = BIT_U8(8); #endif Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com> --- include/linux/bits.h | 22 ++++++++++++++++++++++ include/uapi/linux/const.h | 2 ++ include/vdso/const.h | 1 + 3 files changed, 25 insertions(+)