Message ID | 20250115084228.107573-5-tianruidong@linux.alibaba.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | ARM Error Source Table V2 Support | expand |
Hi Ruidong, kernel test robot noticed the following build errors: [auto build test ERROR on rafael-pm/linux-next] [also build test ERROR on rafael-pm/bleeding-edge arm64/for-next/core ras/edac-for-next linus/master tip/smp/core v6.13-rc7 next-20250116] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Ruidong-Tian/ACPI-RAS-AEST-Initial-AEST-driver/20250115-164601 base: https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next patch link: https://lore.kernel.org/r/20250115084228.107573-5-tianruidong%40linux.alibaba.com patch subject: [PATCH v3 4/5] RAS/ATL: Unified ATL interface for ARM64 and AMD config: x86_64-randconfig-074-20250117 (https://download.01.org/0day-ci/archive/20250117/202501171437.tCtg3x1c-lkp@intel.com/config) compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250117/202501171437.tCtg3x1c-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202501171437.tCtg3x1c-lkp@intel.com/ All errors (new ones prefixed by >>): drivers/ras/amd/fmpm.c: In function 'save_spa': >> drivers/ras/amd/fmpm.c:329:15: error: implicit declaration of function 'amd_convert_umc_mca_addr_to_sys_addr'; did you mean 'convert_umc_mca_addr_to_sys_addr'? [-Werror=implicit-function-declaration] 329 | spa = amd_convert_umc_mca_addr_to_sys_addr(&a_err); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | convert_umc_mca_addr_to_sys_addr cc1: some warnings being treated as errors -- drivers/ras/amd/atl/umc.c: In function 'retire_row_mi300': >> drivers/ras/amd/atl/umc.c:333:24: error: implicit declaration of function 'amd_convert_umc_mca_addr_to_sys_addr'; did you mean 'convert_umc_mca_addr_to_sys_addr'? [-Werror=implicit-function-declaration] 333 | addr = amd_convert_umc_mca_addr_to_sys_addr(a_err); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | convert_umc_mca_addr_to_sys_addr cc1: some warnings being treated as errors vim +329 drivers/ras/amd/fmpm.c 6f15e617cc9932 Yazen Ghannam 2024-02-13 291 838850c50884cd Yazen Ghannam 2024-03-01 292 static void save_spa(struct fru_rec *rec, unsigned int entry, 838850c50884cd Yazen Ghannam 2024-03-01 293 u64 addr, u64 id, unsigned int cpu) 838850c50884cd Yazen Ghannam 2024-03-01 294 { 838850c50884cd Yazen Ghannam 2024-03-01 295 unsigned int i, fru_idx, spa_entry; 838850c50884cd Yazen Ghannam 2024-03-01 296 struct atl_err a_err; 838850c50884cd Yazen Ghannam 2024-03-01 297 unsigned long spa; 838850c50884cd Yazen Ghannam 2024-03-01 298 838850c50884cd Yazen Ghannam 2024-03-01 299 if (entry >= max_nr_entries) { 838850c50884cd Yazen Ghannam 2024-03-01 300 pr_warn_once("FRU descriptor entry %d out-of-bounds (max: %d)\n", 838850c50884cd Yazen Ghannam 2024-03-01 301 entry, max_nr_entries); 838850c50884cd Yazen Ghannam 2024-03-01 302 return; 838850c50884cd Yazen Ghannam 2024-03-01 303 } 838850c50884cd Yazen Ghannam 2024-03-01 304 838850c50884cd Yazen Ghannam 2024-03-01 305 /* spa_nr_entries is always multiple of max_nr_entries */ 838850c50884cd Yazen Ghannam 2024-03-01 306 for (i = 0; i < spa_nr_entries; i += max_nr_entries) { 838850c50884cd Yazen Ghannam 2024-03-01 307 fru_idx = i / max_nr_entries; 838850c50884cd Yazen Ghannam 2024-03-01 308 if (fru_records[fru_idx] == rec) 838850c50884cd Yazen Ghannam 2024-03-01 309 break; 838850c50884cd Yazen Ghannam 2024-03-01 310 } 838850c50884cd Yazen Ghannam 2024-03-01 311 838850c50884cd Yazen Ghannam 2024-03-01 312 if (i >= spa_nr_entries) { 838850c50884cd Yazen Ghannam 2024-03-01 313 pr_warn_once("FRU record %d not found\n", i); 838850c50884cd Yazen Ghannam 2024-03-01 314 return; 838850c50884cd Yazen Ghannam 2024-03-01 315 } 838850c50884cd Yazen Ghannam 2024-03-01 316 838850c50884cd Yazen Ghannam 2024-03-01 317 spa_entry = i + entry; 838850c50884cd Yazen Ghannam 2024-03-01 318 if (spa_entry >= spa_nr_entries) { 838850c50884cd Yazen Ghannam 2024-03-01 319 pr_warn_once("spa_entries[] index out-of-bounds\n"); 838850c50884cd Yazen Ghannam 2024-03-01 320 return; 838850c50884cd Yazen Ghannam 2024-03-01 321 } 838850c50884cd Yazen Ghannam 2024-03-01 322 838850c50884cd Yazen Ghannam 2024-03-01 323 memset(&a_err, 0, sizeof(struct atl_err)); 838850c50884cd Yazen Ghannam 2024-03-01 324 838850c50884cd Yazen Ghannam 2024-03-01 325 a_err.addr = addr; 838850c50884cd Yazen Ghannam 2024-03-01 326 a_err.ipid = id; 838850c50884cd Yazen Ghannam 2024-03-01 327 a_err.cpu = cpu; 838850c50884cd Yazen Ghannam 2024-03-01 328 838850c50884cd Yazen Ghannam 2024-03-01 @329 spa = amd_convert_umc_mca_addr_to_sys_addr(&a_err); 838850c50884cd Yazen Ghannam 2024-03-01 330 if (IS_ERR_VALUE(spa)) { 838850c50884cd Yazen Ghannam 2024-03-01 331 pr_debug("Failed to get system address\n"); 838850c50884cd Yazen Ghannam 2024-03-01 332 return; 838850c50884cd Yazen Ghannam 2024-03-01 333 } 838850c50884cd Yazen Ghannam 2024-03-01 334 838850c50884cd Yazen Ghannam 2024-03-01 335 spa_entries[spa_entry] = spa; 838850c50884cd Yazen Ghannam 2024-03-01 336 pr_debug("fru_idx: %u, entry: %u, spa_entry: %u, spa: 0x%016llx\n", 838850c50884cd Yazen Ghannam 2024-03-01 337 fru_idx, entry, spa_entry, spa_entries[spa_entry]); 838850c50884cd Yazen Ghannam 2024-03-01 338 } 838850c50884cd Yazen Ghannam 2024-03-01 339
On Wed, Jan 15, 2025 at 04:42:27PM +0800, Ruidong Tian wrote: > Subject: Re: [PATCH v3 4/5] RAS/ATL: Unified ATL interface for ARM64 and AMD The condensed patch description in the subject line should start with a uppercase letter and should be written in imperative tone. > Translate device normalize address in AMD, also named logical address, > to system physical address is a common interface in RAS. Provides common > interface both for AMD and ARM. This needs a lot more explanation. > Signed-off-by: Ruidong Tian <tianruidong@linux.alibaba.com> > --- > drivers/edac/amd64_edac.c | 2 +- > drivers/ras/aest/aest-core.c | 12 ++++++------ > drivers/ras/amd/atl/core.c | 4 ++-- > drivers/ras/amd/atl/internal.h | 2 +- > drivers/ras/amd/atl/umc.c | 3 ++- > drivers/ras/ras.c | 24 +++++++++++------------- > include/linux/ras.h | 9 ++++----- > 7 files changed, 27 insertions(+), 29 deletions(-) > > diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c > index ddfbdb66b794..1e9c96e4daa8 100644 > --- a/drivers/edac/amd64_edac.c > +++ b/drivers/edac/amd64_edac.c > @@ -2832,7 +2832,7 @@ static void decode_umc_error(int node_id, struct mce *m) > a_err.ipid = m->ipid; > a_err.cpu = m->extcpu; > > - sys_addr = amd_convert_umc_mca_addr_to_sys_addr(&a_err); > + sys_addr = convert_ras_la_to_spa(&a_err); No, this is not how all this is done. You don't rename functions and make them generic - you *extract* generic functionality into generic functions and have other functions which use them, call them. And you do that when there are users, not before. Ok, that should be enough feedback for now. Thx.
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index ddfbdb66b794..1e9c96e4daa8 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2832,7 +2832,7 @@ static void decode_umc_error(int node_id, struct mce *m) a_err.ipid = m->ipid; a_err.cpu = m->extcpu; - sys_addr = amd_convert_umc_mca_addr_to_sys_addr(&a_err); + sys_addr = convert_ras_la_to_spa(&a_err); if (IS_ERR_VALUE(sys_addr)) { err.err_code = ERR_NORM_ADDR; goto log_error; diff --git a/drivers/ras/aest/aest-core.c b/drivers/ras/aest/aest-core.c index 12d0a32ecda9..0530880ded3e 100644 --- a/drivers/ras/aest/aest-core.c +++ b/drivers/ras/aest/aest-core.c @@ -228,16 +228,16 @@ static void aest_node_pool_process(struct work_struct *work) llist_for_each_entry(event, head, llnode) { aest_print(event); - /* TODO: translate Logical Addresses to System Physical Addresses */ + addr = event->regs.err_addr & (1UL << CONFIG_ARM64_PA_BITS); + if (event->addressing_mode == AEST_ADDREESS_LA || - (event->regs.err_addr & ERR_ADDR_AI)) { - pr_notice("Can not translate LA to SPA\n"); - addr = 0; - } else + (event->regs.err_addr & ERR_ADDR_AI)) + addr = convert_ras_la_to_spa(event); + else addr = event->regs.err_addr & (1UL << CONFIG_ARM64_PA_BITS); status = event->regs.err_status; - if (addr && ((status & ERR_STATUS_UE) || (status & ERR_STATUS_DE))) + if (addr > 0 && ((status & ERR_STATUS_UE) || (status & ERR_STATUS_DE))) aest_handle_memory_failure(addr); blocking_notifier_call_chain(&aest_decoder_chain, 0, event); diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c index 4197e10993ac..6b5f0f65bf8e 100644 --- a/drivers/ras/amd/atl/core.c +++ b/drivers/ras/amd/atl/core.c @@ -207,7 +207,7 @@ static int __init amd_atl_init(void) /* Increment this module's recount so that it can't be easily unloaded. */ __module_get(THIS_MODULE); - amd_atl_register_decoder(convert_umc_mca_addr_to_sys_addr); + atl_register_decoder(convert_umc_mca_addr_to_sys_addr); pr_info("AMD Address Translation Library initialized\n"); return 0; @@ -219,7 +219,7 @@ static int __init amd_atl_init(void) */ static void __exit amd_atl_exit(void) { - amd_atl_unregister_decoder(); + atl_unregister_decoder(); } module_init(amd_atl_init); diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index 143d04c779a8..42686189decb 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -277,7 +277,7 @@ int denormalize_address(struct addr_ctx *ctx); int dehash_address(struct addr_ctx *ctx); unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr); -unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err); +unsigned long convert_umc_mca_addr_to_sys_addr(void *data); u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr); u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr); diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index dc8aa12f63c8..aa13f7fd7ba4 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -395,8 +395,9 @@ static u8 get_coh_st_inst_id(struct atl_err *err) return FIELD_GET(UMC_CHANNEL_NUM, err->ipid); } -unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err) +unsigned long convert_umc_mca_addr_to_sys_addr(void *data) { + struct atl_err *err = data; u8 socket_id = topology_physical_package_id(err->cpu); u8 coh_st_inst_id = get_coh_st_inst_id(err); unsigned long addr = get_addr(err->addr); diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index a6e4792a1b2e..e5f23a8279c2 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -10,36 +10,34 @@ #include <linux/ras.h> #include <linux/uuid.h> -#if IS_ENABLED(CONFIG_AMD_ATL) /* * Once set, this function pointer should never be unset. * * The library module will set this pointer if it successfully loads. The module * should not be unloaded except for testing and debug purposes. */ -static unsigned long (*amd_atl_umc_na_to_spa)(struct atl_err *err); +static unsigned long (*atl_ras_la_to_spa)(void *err); -void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *)) +void atl_register_decoder(unsigned long (*f)(void *)) { - amd_atl_umc_na_to_spa = f; + atl_ras_la_to_spa = f; } -EXPORT_SYMBOL_GPL(amd_atl_register_decoder); +EXPORT_SYMBOL_GPL(atl_register_decoder); -void amd_atl_unregister_decoder(void) +void atl_unregister_decoder(void) { - amd_atl_umc_na_to_spa = NULL; + atl_ras_la_to_spa = NULL; } -EXPORT_SYMBOL_GPL(amd_atl_unregister_decoder); +EXPORT_SYMBOL_GPL(atl_unregister_decoder); -unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) +unsigned long convert_ras_la_to_spa(void *err) { - if (!amd_atl_umc_na_to_spa) + if (!atl_ras_la_to_spa) return -EINVAL; - return amd_atl_umc_na_to_spa(err); + return atl_ras_la_to_spa(err); } -EXPORT_SYMBOL_GPL(amd_convert_umc_mca_addr_to_sys_addr); -#endif /* CONFIG_AMD_ATL */ +EXPORT_SYMBOL_GPL(convert_ras_la_to_spa); #define CREATE_TRACE_POINTS #define TRACE_INCLUDE_PATH ../../include/ras diff --git a/include/linux/ras.h b/include/linux/ras.h index 1c777af6a1af..2e90556779d2 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -43,16 +43,15 @@ struct atl_err { }; #if IS_ENABLED(CONFIG_AMD_ATL) -void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *)); -void amd_atl_unregister_decoder(void); void amd_retire_dram_row(struct atl_err *err); -unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err); #else static inline void amd_retire_dram_row(struct atl_err *err) { } -static inline unsigned long -amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; } #endif /* CONFIG_AMD_ATL */ +void atl_register_decoder(unsigned long (*f)(void *)); +void atl_unregister_decoder(void); +unsigned long convert_ras_la_to_spa(void *err); + #if IS_ENABLED(CONFIG_AEST) void aest_register_decode_chain(struct notifier_block *nb); void aest_unregister_decode_chain(struct notifier_block *nb);
Translate device normalize address in AMD, also named logical address, to system physical address is a common interface in RAS. Provides common interface both for AMD and ARM. Signed-off-by: Ruidong Tian <tianruidong@linux.alibaba.com> --- drivers/edac/amd64_edac.c | 2 +- drivers/ras/aest/aest-core.c | 12 ++++++------ drivers/ras/amd/atl/core.c | 4 ++-- drivers/ras/amd/atl/internal.h | 2 +- drivers/ras/amd/atl/umc.c | 3 ++- drivers/ras/ras.c | 24 +++++++++++------------- include/linux/ras.h | 9 ++++----- 7 files changed, 27 insertions(+), 29 deletions(-)