diff mbox series

[v3,4/5] RAS/ATL: Unified ATL interface for ARM64 and AMD

Message ID 20250115084228.107573-5-tianruidong@linux.alibaba.com (mailing list archive)
State New, archived
Headers show
Series ARM Error Source Table V2 Support | expand

Commit Message

Ruidong Tian Jan. 15, 2025, 8:42 a.m. UTC
Translate device normalize address in AMD, also named logical address,
to system physical address is a common interface in RAS. Provides common
interface both for AMD and ARM.

Signed-off-by: Ruidong Tian <tianruidong@linux.alibaba.com>
---
 drivers/edac/amd64_edac.c      |  2 +-
 drivers/ras/aest/aest-core.c   | 12 ++++++------
 drivers/ras/amd/atl/core.c     |  4 ++--
 drivers/ras/amd/atl/internal.h |  2 +-
 drivers/ras/amd/atl/umc.c      |  3 ++-
 drivers/ras/ras.c              | 24 +++++++++++-------------
 include/linux/ras.h            |  9 ++++-----
 7 files changed, 27 insertions(+), 29 deletions(-)

Comments

kernel test robot Jan. 17, 2025, 6:14 a.m. UTC | #1
Hi Ruidong,

kernel test robot noticed the following build errors:

[auto build test ERROR on rafael-pm/linux-next]
[also build test ERROR on rafael-pm/bleeding-edge arm64/for-next/core ras/edac-for-next linus/master tip/smp/core v6.13-rc7 next-20250116]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Ruidong-Tian/ACPI-RAS-AEST-Initial-AEST-driver/20250115-164601
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
patch link:    https://lore.kernel.org/r/20250115084228.107573-5-tianruidong%40linux.alibaba.com
patch subject: [PATCH v3 4/5] RAS/ATL: Unified ATL interface for ARM64 and AMD
config: x86_64-randconfig-074-20250117 (https://download.01.org/0day-ci/archive/20250117/202501171437.tCtg3x1c-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250117/202501171437.tCtg3x1c-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202501171437.tCtg3x1c-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/ras/amd/fmpm.c: In function 'save_spa':
>> drivers/ras/amd/fmpm.c:329:15: error: implicit declaration of function 'amd_convert_umc_mca_addr_to_sys_addr'; did you mean 'convert_umc_mca_addr_to_sys_addr'? [-Werror=implicit-function-declaration]
     329 |         spa = amd_convert_umc_mca_addr_to_sys_addr(&a_err);
         |               ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         |               convert_umc_mca_addr_to_sys_addr
   cc1: some warnings being treated as errors
--
   drivers/ras/amd/atl/umc.c: In function 'retire_row_mi300':
>> drivers/ras/amd/atl/umc.c:333:24: error: implicit declaration of function 'amd_convert_umc_mca_addr_to_sys_addr'; did you mean 'convert_umc_mca_addr_to_sys_addr'? [-Werror=implicit-function-declaration]
     333 |                 addr = amd_convert_umc_mca_addr_to_sys_addr(a_err);
         |                        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         |                        convert_umc_mca_addr_to_sys_addr
   cc1: some warnings being treated as errors


vim +329 drivers/ras/amd/fmpm.c

6f15e617cc9932 Yazen Ghannam 2024-02-13  291  
838850c50884cd Yazen Ghannam 2024-03-01  292  static void save_spa(struct fru_rec *rec, unsigned int entry,
838850c50884cd Yazen Ghannam 2024-03-01  293  		     u64 addr, u64 id, unsigned int cpu)
838850c50884cd Yazen Ghannam 2024-03-01  294  {
838850c50884cd Yazen Ghannam 2024-03-01  295  	unsigned int i, fru_idx, spa_entry;
838850c50884cd Yazen Ghannam 2024-03-01  296  	struct atl_err a_err;
838850c50884cd Yazen Ghannam 2024-03-01  297  	unsigned long spa;
838850c50884cd Yazen Ghannam 2024-03-01  298  
838850c50884cd Yazen Ghannam 2024-03-01  299  	if (entry >= max_nr_entries) {
838850c50884cd Yazen Ghannam 2024-03-01  300  		pr_warn_once("FRU descriptor entry %d out-of-bounds (max: %d)\n",
838850c50884cd Yazen Ghannam 2024-03-01  301  			     entry, max_nr_entries);
838850c50884cd Yazen Ghannam 2024-03-01  302  		return;
838850c50884cd Yazen Ghannam 2024-03-01  303  	}
838850c50884cd Yazen Ghannam 2024-03-01  304  
838850c50884cd Yazen Ghannam 2024-03-01  305  	/* spa_nr_entries is always multiple of max_nr_entries */
838850c50884cd Yazen Ghannam 2024-03-01  306  	for (i = 0; i < spa_nr_entries; i += max_nr_entries) {
838850c50884cd Yazen Ghannam 2024-03-01  307  		fru_idx = i / max_nr_entries;
838850c50884cd Yazen Ghannam 2024-03-01  308  		if (fru_records[fru_idx] == rec)
838850c50884cd Yazen Ghannam 2024-03-01  309  			break;
838850c50884cd Yazen Ghannam 2024-03-01  310  	}
838850c50884cd Yazen Ghannam 2024-03-01  311  
838850c50884cd Yazen Ghannam 2024-03-01  312  	if (i >= spa_nr_entries) {
838850c50884cd Yazen Ghannam 2024-03-01  313  		pr_warn_once("FRU record %d not found\n", i);
838850c50884cd Yazen Ghannam 2024-03-01  314  		return;
838850c50884cd Yazen Ghannam 2024-03-01  315  	}
838850c50884cd Yazen Ghannam 2024-03-01  316  
838850c50884cd Yazen Ghannam 2024-03-01  317  	spa_entry = i + entry;
838850c50884cd Yazen Ghannam 2024-03-01  318  	if (spa_entry >= spa_nr_entries) {
838850c50884cd Yazen Ghannam 2024-03-01  319  		pr_warn_once("spa_entries[] index out-of-bounds\n");
838850c50884cd Yazen Ghannam 2024-03-01  320  		return;
838850c50884cd Yazen Ghannam 2024-03-01  321  	}
838850c50884cd Yazen Ghannam 2024-03-01  322  
838850c50884cd Yazen Ghannam 2024-03-01  323  	memset(&a_err, 0, sizeof(struct atl_err));
838850c50884cd Yazen Ghannam 2024-03-01  324  
838850c50884cd Yazen Ghannam 2024-03-01  325  	a_err.addr = addr;
838850c50884cd Yazen Ghannam 2024-03-01  326  	a_err.ipid = id;
838850c50884cd Yazen Ghannam 2024-03-01  327  	a_err.cpu  = cpu;
838850c50884cd Yazen Ghannam 2024-03-01  328  
838850c50884cd Yazen Ghannam 2024-03-01 @329  	spa = amd_convert_umc_mca_addr_to_sys_addr(&a_err);
838850c50884cd Yazen Ghannam 2024-03-01  330  	if (IS_ERR_VALUE(spa)) {
838850c50884cd Yazen Ghannam 2024-03-01  331  		pr_debug("Failed to get system address\n");
838850c50884cd Yazen Ghannam 2024-03-01  332  		return;
838850c50884cd Yazen Ghannam 2024-03-01  333  	}
838850c50884cd Yazen Ghannam 2024-03-01  334  
838850c50884cd Yazen Ghannam 2024-03-01  335  	spa_entries[spa_entry] = spa;
838850c50884cd Yazen Ghannam 2024-03-01  336  	pr_debug("fru_idx: %u, entry: %u, spa_entry: %u, spa: 0x%016llx\n",
838850c50884cd Yazen Ghannam 2024-03-01  337  		 fru_idx, entry, spa_entry, spa_entries[spa_entry]);
838850c50884cd Yazen Ghannam 2024-03-01  338  }
838850c50884cd Yazen Ghannam 2024-03-01  339
Borislav Petkov Feb. 19, 2025, 9 p.m. UTC | #2
On Wed, Jan 15, 2025 at 04:42:27PM +0800, Ruidong Tian wrote:
> Subject: Re: [PATCH v3 4/5] RAS/ATL: Unified ATL interface for ARM64 and AMD

The condensed patch description in the subject line should start with a
uppercase letter and should be written in imperative tone.


> Translate device normalize address in AMD, also named logical address,
> to system physical address is a common interface in RAS. Provides common
> interface both for AMD and ARM.

This needs a lot more explanation.

> Signed-off-by: Ruidong Tian <tianruidong@linux.alibaba.com>
> ---
>  drivers/edac/amd64_edac.c      |  2 +-
>  drivers/ras/aest/aest-core.c   | 12 ++++++------
>  drivers/ras/amd/atl/core.c     |  4 ++--
>  drivers/ras/amd/atl/internal.h |  2 +-
>  drivers/ras/amd/atl/umc.c      |  3 ++-
>  drivers/ras/ras.c              | 24 +++++++++++-------------
>  include/linux/ras.h            |  9 ++++-----
>  7 files changed, 27 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
> index ddfbdb66b794..1e9c96e4daa8 100644
> --- a/drivers/edac/amd64_edac.c
> +++ b/drivers/edac/amd64_edac.c
> @@ -2832,7 +2832,7 @@ static void decode_umc_error(int node_id, struct mce *m)
>  	a_err.ipid = m->ipid;
>  	a_err.cpu  = m->extcpu;
>  
> -	sys_addr = amd_convert_umc_mca_addr_to_sys_addr(&a_err);
> +	sys_addr = convert_ras_la_to_spa(&a_err);

No, this is not how all this is done. You don't rename functions and make them
generic - you *extract* generic functionality into generic functions and have
other functions which use them, call them.

And you do that when there are users, not before.

Ok, that should be enough feedback for now.

Thx.
diff mbox series

Patch

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index ddfbdb66b794..1e9c96e4daa8 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2832,7 +2832,7 @@  static void decode_umc_error(int node_id, struct mce *m)
 	a_err.ipid = m->ipid;
 	a_err.cpu  = m->extcpu;
 
-	sys_addr = amd_convert_umc_mca_addr_to_sys_addr(&a_err);
+	sys_addr = convert_ras_la_to_spa(&a_err);
 	if (IS_ERR_VALUE(sys_addr)) {
 		err.err_code = ERR_NORM_ADDR;
 		goto log_error;
diff --git a/drivers/ras/aest/aest-core.c b/drivers/ras/aest/aest-core.c
index 12d0a32ecda9..0530880ded3e 100644
--- a/drivers/ras/aest/aest-core.c
+++ b/drivers/ras/aest/aest-core.c
@@ -228,16 +228,16 @@  static void aest_node_pool_process(struct work_struct *work)
 	llist_for_each_entry(event, head, llnode) {
 		aest_print(event);
 
-		/* TODO: translate Logical Addresses to System Physical Addresses */
+		addr = event->regs.err_addr & (1UL << CONFIG_ARM64_PA_BITS);
+
 		if (event->addressing_mode == AEST_ADDREESS_LA ||
-			(event->regs.err_addr & ERR_ADDR_AI)) {
-			pr_notice("Can not translate LA to SPA\n");
-			addr = 0;
-		} else
+			(event->regs.err_addr & ERR_ADDR_AI))
+			addr = convert_ras_la_to_spa(event);
+		else
 			addr = event->regs.err_addr & (1UL << CONFIG_ARM64_PA_BITS);
 
 		status = event->regs.err_status;
-		if (addr && ((status & ERR_STATUS_UE) || (status & ERR_STATUS_DE)))
+		if (addr > 0 && ((status & ERR_STATUS_UE) || (status & ERR_STATUS_DE)))
 			aest_handle_memory_failure(addr);
 
 		blocking_notifier_call_chain(&aest_decoder_chain, 0, event);
diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c
index 4197e10993ac..6b5f0f65bf8e 100644
--- a/drivers/ras/amd/atl/core.c
+++ b/drivers/ras/amd/atl/core.c
@@ -207,7 +207,7 @@  static int __init amd_atl_init(void)
 
 	/* Increment this module's recount so that it can't be easily unloaded. */
 	__module_get(THIS_MODULE);
-	amd_atl_register_decoder(convert_umc_mca_addr_to_sys_addr);
+	atl_register_decoder(convert_umc_mca_addr_to_sys_addr);
 
 	pr_info("AMD Address Translation Library initialized\n");
 	return 0;
@@ -219,7 +219,7 @@  static int __init amd_atl_init(void)
  */
 static void __exit amd_atl_exit(void)
 {
-	amd_atl_unregister_decoder();
+	atl_unregister_decoder();
 }
 
 module_init(amd_atl_init);
diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
index 143d04c779a8..42686189decb 100644
--- a/drivers/ras/amd/atl/internal.h
+++ b/drivers/ras/amd/atl/internal.h
@@ -277,7 +277,7 @@  int denormalize_address(struct addr_ctx *ctx);
 int dehash_address(struct addr_ctx *ctx);
 
 unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr);
-unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
+unsigned long convert_umc_mca_addr_to_sys_addr(void *data);
 
 u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr);
 u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr);
diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c
index dc8aa12f63c8..aa13f7fd7ba4 100644
--- a/drivers/ras/amd/atl/umc.c
+++ b/drivers/ras/amd/atl/umc.c
@@ -395,8 +395,9 @@  static u8 get_coh_st_inst_id(struct atl_err *err)
 	return FIELD_GET(UMC_CHANNEL_NUM, err->ipid);
 }
 
-unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err)
+unsigned long convert_umc_mca_addr_to_sys_addr(void *data)
 {
+	struct atl_err *err = data;
 	u8 socket_id = topology_physical_package_id(err->cpu);
 	u8 coh_st_inst_id = get_coh_st_inst_id(err);
 	unsigned long addr = get_addr(err->addr);
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index a6e4792a1b2e..e5f23a8279c2 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -10,36 +10,34 @@ 
 #include <linux/ras.h>
 #include <linux/uuid.h>
 
-#if IS_ENABLED(CONFIG_AMD_ATL)
 /*
  * Once set, this function pointer should never be unset.
  *
  * The library module will set this pointer if it successfully loads. The module
  * should not be unloaded except for testing and debug purposes.
  */
-static unsigned long (*amd_atl_umc_na_to_spa)(struct atl_err *err);
+static unsigned long (*atl_ras_la_to_spa)(void *err);
 
-void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *))
+void atl_register_decoder(unsigned long (*f)(void *))
 {
-	amd_atl_umc_na_to_spa = f;
+	atl_ras_la_to_spa = f;
 }
-EXPORT_SYMBOL_GPL(amd_atl_register_decoder);
+EXPORT_SYMBOL_GPL(atl_register_decoder);
 
-void amd_atl_unregister_decoder(void)
+void atl_unregister_decoder(void)
 {
-	amd_atl_umc_na_to_spa = NULL;
+	atl_ras_la_to_spa = NULL;
 }
-EXPORT_SYMBOL_GPL(amd_atl_unregister_decoder);
+EXPORT_SYMBOL_GPL(atl_unregister_decoder);
 
-unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err)
+unsigned long convert_ras_la_to_spa(void *err)
 {
-	if (!amd_atl_umc_na_to_spa)
+	if (!atl_ras_la_to_spa)
 		return -EINVAL;
 
-	return amd_atl_umc_na_to_spa(err);
+	return atl_ras_la_to_spa(err);
 }
-EXPORT_SYMBOL_GPL(amd_convert_umc_mca_addr_to_sys_addr);
-#endif /* CONFIG_AMD_ATL */
+EXPORT_SYMBOL_GPL(convert_ras_la_to_spa);
 
 #define CREATE_TRACE_POINTS
 #define TRACE_INCLUDE_PATH ../../include/ras
diff --git a/include/linux/ras.h b/include/linux/ras.h
index 1c777af6a1af..2e90556779d2 100644
--- a/include/linux/ras.h
+++ b/include/linux/ras.h
@@ -43,16 +43,15 @@  struct atl_err {
 };
 
 #if IS_ENABLED(CONFIG_AMD_ATL)
-void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *));
-void amd_atl_unregister_decoder(void);
 void amd_retire_dram_row(struct atl_err *err);
-unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
 #else
 static inline void amd_retire_dram_row(struct atl_err *err) { }
-static inline unsigned long
-amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; }
 #endif /* CONFIG_AMD_ATL */
 
+void atl_register_decoder(unsigned long (*f)(void *));
+void atl_unregister_decoder(void);
+unsigned long convert_ras_la_to_spa(void *err);
+
 #if IS_ENABLED(CONFIG_AEST)
 void aest_register_decode_chain(struct notifier_block *nb);
 void aest_unregister_decode_chain(struct notifier_block *nb);