Message ID | 20210823185437.94417-4-nchatrad@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | x86/edac/amd64: Add support for noncpu nodes | expand |
On Tue, Aug 24, 2021 at 12:24:37AM +0530, Naveen Krishna Chatradhi wrote: > On newer heterogeneous systems the data fabrics of the CPUs and GPUs > are connected directly via a custom links. > > This patch modifies the amd64_edac module to handle the HBM memory > enumeration leveraging the existing edac and the amd64 specific data > structures. > > Define PCI IDs and ops for Aldeberarn GPUs in family_types array. > The UMC Phys on GPU nodes are enumerated as csrows and the UMC channels > connected to HBMs are enumerated as ranks. > Define a function to find the UMCv2 channel number. > Define a function to calculate base address of the UMCv2 registers. > ECC is enabled by default on HBM's. > Adds debug information for UMCv2 channel registers. Avoid having "This patch" or "This commit" in the commit message. It is tautologically useless. Also, do $ git grep 'This patch' Documentation/process for more details. What is more, a commit message should not explain *what* the patch is doing - that should be obvious from the diff itself. Rather, it should concentrate more on the *why* it is doing it. > Signed-off-by: Muralidhara M K <muralimk@amd.com> > Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com> This SOB chain is wrong. It suggests Muralidhara is the author but his From: like in patch 1 is missing here. > Cc: Yazen Ghannam <yazen.ghannam@amd.com> > --- > Changes since v2: > 1. Restored line deletions and handled minor comments > 2. Modified commit message and some of the function comments > 3. variable df_inst_id is introduced instead of umc_num > > drivers/edac/amd64_edac.c | 219 +++++++++++++++++++++++++++++++++----- > drivers/edac/amd64_edac.h | 28 +++++ > 2 files changed, 222 insertions(+), 25 deletions(-) ... > @@ -1097,6 +1103,15 @@ static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) > > edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl); > > + if (pvt->is_noncpu) { > + cs_mode = f17_get_cs_mode(cs0, ctrl, pvt); > + for_each_chip_select(cs0, ctrl, pvt) { > + size0 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs0); > + amd64_info(EDAC_MC ": %d: %5dMB\n", cs0, size0); > + } > + return; > + } No, define a separate debug_display_dimm_sizes_gpu() and put all the GPU-specific dumping in there instead of sprinkling them around the code like that. > + > for (dimm = 0; dimm < 2; dimm++) { > cs0 = dimm * 2; > cs1 = dimm * 2 + 1; > @@ -1121,10 +1136,15 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt) Ditto: __dump_misc_regs_gpu() > umc_base = get_umc_base(i); > umc = &pvt->umc[i]; > > - edac_dbg(1, "UMC%d DIMM cfg: 0x%x\n", i, umc->dimm_cfg); > + if (!pvt->is_noncpu) > + edac_dbg(1, "UMC%d DIMM cfg: 0x%x\n", i, umc->dimm_cfg); > edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg); > edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl); > edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl); > + if (pvt->is_noncpu) { > + edac_dbg(1, "UMC%d All HBMs support ECC: yes\n", i); > + goto dimm_size; > + } > > amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ECC_BAD_SYMBOL, &tmp); > edac_dbg(1, "UMC%d ECC bad symbol: 0x%x\n", i, tmp); ... > +static void read_noncpu_umc_base_mask(struct amd64_pvt *pvt) > +{ > + u32 base_reg, mask_reg; > + u32 *base, *mask; > + int umc, cs; > + > + for_each_umc(umc) { > + for_each_chip_select(cs, umc, pvt) { > + base_reg = get_noncpu_umc_base(umc, cs) + UMCCH_BASE_ADDR; > + base = &pvt->csels[umc].csbases[cs]; > + > + if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) { > + edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n", > + umc, cs, *base, base_reg); > + } > + > + mask_reg = get_noncpu_umc_base(umc, cs) + UMCCH_ADDR_MASK; > + mask = &pvt->csels[umc].csmasks[cs]; > + > + if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) { > + edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n", > + umc, cs, *mask, mask_reg); > + } > + } > + } > +} This code pretty-much duplicates what read_umc_base_mask() does - pls add a common helper which is used by both CPU and GPU. > + > static void read_umc_base_mask(struct amd64_pvt *pvt) > { > u32 umc_base_reg, umc_base_reg_sec; > @@ -1288,8 +1341,12 @@ static void read_dct_base_mask(struct amd64_pvt *pvt) > > prep_chip_selects(pvt); > > - if (pvt->umc) > - return read_umc_base_mask(pvt); > + if (pvt->umc) { > + if (pvt->is_noncpu) > + return read_noncpu_umc_base_mask(pvt); > + else > + return read_umc_base_mask(pvt); > + } > > for_each_chip_select(cs, 0, pvt) { > int reg0 = DCSB0 + (cs * 4); > @@ -1335,6 +1392,11 @@ static void determine_memory_type(struct amd64_pvt *pvt) > u32 dram_ctrl, dcsm; > > if (pvt->umc) { > + if (pvt->is_noncpu) { > + pvt->dram_type = MEM_HBM2; > + return; > + } I don't like this sprinkling of "if (pvt->is_noncpu)" everywhere, at all. Please define a separate read_mc_regs_df() or so which contains only the needed functionalty which you can carve out from read_mc_regs(). > + > if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) > pvt->dram_type = MEM_LRDDR4; > else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) > @@ -1724,7 +1786,10 @@ static int f17_early_channel_count(struct amd64_pvt *pvt) > > /* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */ > for_each_umc(i) > - channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT); > + if (pvt->is_noncpu) > + channels += pvt->csels[i].b_cnt; > + else > + channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT); > > amd64_info("MCT channel count: %d\n", channels); > No, a separate gpu_early_channel_count() is needed here. There's a reason for those function pointers getting assigned depending on family. > @@ -1865,6 +1930,12 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, > u32 msb, weight, num_zero_bits; > int dimm, size = 0; > > + if (pvt->is_noncpu) { > + addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr]; > + /* The memory channels in case of GPUs are fully populated */ > + goto skip_noncpu; > + } > + Ditto. > /* No Chip Selects are enabled. */ > if (!cs_mode) > return size; > @@ -1890,6 +1961,7 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, > else > addr_mask_orig = pvt->csels[umc].csmasks[dimm]; > > + skip_noncpu: > /* > * The number of zero bits in the mask is equal to the number of bits > * in a full mask minus the number of bits in the current mask. > @@ -2635,6 +2707,16 @@ static struct amd64_family_type family_types[] = { > .dbam_to_cs = f17_addr_mask_to_cs_size, > } > }, > + [ALDEBARAN_GPUS] = { > + .ctl_name = "ALDEBARAN", > + .f0_id = PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F0, > + .f6_id = PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F6, > + .max_mcs = 4, > + .ops = { > + .early_channel_count = f17_early_channel_count, > + .dbam_to_cs = f17_addr_mask_to_cs_size, > + } > + }, Here you define those GPU-specific function pointers which you then call. > @@ -2890,6 +2972,30 @@ static int find_umc_channel(struct mce *m) > return (m->ipid & GENMASK(31, 0)) >> 20; > } > > +/* > + * The CPUs have one channel per UMC, So a UMC number is equivalent to a > + * channel number. The NONCPUs have 8 channels per UMC, so the UMC number no > + * longer works as a channel number. > + * The channel number within a NONCPU UMC is given in MCA_IPID[15:12]. > + * However, the IDs are split such that two UMC values go to one UMC, and > + * the channel numbers are split in two groups of four. > + * > + * Refer comment on get_noncpu_umc_base() from amd64_edac.h > + * > + * For example, > + * UMC0 CH[3:0] = 0x0005[3:0]000 > + * UMC0 CH[7:4] = 0x0015[3:0]000 > + * UMC1 CH[3:0] = 0x0025[3:0]000 > + * UMC1 CH[7:4] = 0x0035[3:0]000 > + */ > +static int find_umc_channel_noncpu(struct mce *m) > +{ > + u8 umc = find_umc_channel(m); > + u8 ch = ((m->ipid >> 12) & 0xf); > + > + return umc % 2 ? (ch + 4) : ch; > +} > + > static void decode_umc_error(int node_id, struct mce *m) > { > u8 ecc_type = (m->status >> 45) & 0x3; > @@ -2897,6 +3003,7 @@ static void decode_umc_error(int node_id, struct mce *m) > struct amd64_pvt *pvt; > struct err_info err; > u64 sys_addr; > + u8 df_inst_id; You don't need that variable and can work with err.channel just fine. > mci = edac_mc_find(node_id); > if (!mci) > @@ -2909,7 +3016,22 @@ static void decode_umc_error(int node_id, struct mce *m) > if (m->status & MCI_STATUS_DEFERRED) > ecc_type = 3; > > - err.channel = find_umc_channel(m); > + if (pvt->is_noncpu) { > + /* > + * The NONCPUs have one Chip Select per UMC, so the UMC number > + * can used as the Chip Select number. However, the UMC number > + * is split in the ID value so it's necessary to divide by 2. > + */ > + err.csrow = find_umc_channel(m) / 2; > + err.channel = find_umc_channel_noncpu(m); > + /* On NONCPUs, instance id is calculated as below. */ > + df_inst_id = err.csrow * 8 + err.channel; err.channel += err.csrow * 8; tadaaa! > + } else { > + /* On CPUs, "Channel"="UMC Number"="DF Instance ID". */ > + err.channel = find_umc_channel(m); > + err.csrow = m->synd & 0x7; > + df_inst_id = err.channel; > + } > > if (!(m->status & MCI_STATUS_SYNDV)) { > err.err_code = ERR_SYND; > @@ -2925,9 +3047,7 @@ static void decode_umc_error(int node_id, struct mce *m) > err.err_code = ERR_CHANNEL; > } > > - err.csrow = m->synd & 0x7; > - > - if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { > + if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, df_inst_id, &sys_addr)) { > err.err_code = ERR_NORM_ADDR; > goto log_error; > } > @@ -3054,15 +3174,21 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt) > > /* Read registers from each UMC */ > for_each_umc(i) { > + if (pvt->is_noncpu) > + umc_base = get_noncpu_umc_base(i, 0); > + else > + umc_base = get_umc_base(i); > > - umc_base = get_umc_base(i); > umc = &pvt->umc[i]; > > - amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg); > amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg); > amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl); > amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl); > - amd_smn_read(nid, umc_base + UMCCH_UMC_CAP_HI, &umc->umc_cap_hi); > + > + if (!pvt->is_noncpu) { > + amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg); > + amd_smn_read(nid, umc_base + UMCCH_UMC_CAP_HI, &umc->umc_cap_hi); > + } > } > } > > @@ -3144,7 +3270,9 @@ static void read_mc_regs(struct amd64_pvt *pvt) > determine_memory_type(pvt); > edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); > > - determine_ecc_sym_sz(pvt); > + /* ECC symbol size is not available on NONCPU nodes */ > + if (!pvt->is_noncpu) > + determine_ecc_sym_sz(pvt); > } > > /* > @@ -3232,15 +3360,21 @@ static int init_csrows_df(struct mem_ctl_info *mci) No, separate function: init_csrows_gpu() > continue; > > empty = 0; > - dimm = mci->csrows[cs]->channels[umc]->dimm; > + if (pvt->is_noncpu) { > + dimm = mci->csrows[umc]->channels[cs]->dimm; > + dimm->edac_mode = EDAC_SECDED; > + dimm->dtype = DEV_X16; > + } else { > + dimm = mci->csrows[cs]->channels[umc]->dimm; > + dimm->edac_mode = edac_mode; > + dimm->dtype = dev_type; > + } > > edac_dbg(1, "MC node: %d, csrow: %d\n", > pvt->mc_node_id, cs); > > dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs); > dimm->mtype = pvt->dram_type; > - dimm->edac_mode = edac_mode; > - dimm->dtype = dev_type; > dimm->grain = 64; > } > } > @@ -3505,7 +3639,9 @@ static bool ecc_enabled(struct amd64_pvt *pvt) > > umc_en_mask |= BIT(i); > > - if (umc->umc_cap_hi & UMC_ECC_ENABLED) > + /* ECC is enabled by default on NONCPU nodes */ > + if (pvt->is_noncpu || > + (umc->umc_cap_hi & UMC_ECC_ENABLED)) > ecc_en_mask |= BIT(i); Separate function pls. I guess you get the idea - you simply define a separate function for the family you're adding support for instead of sprinkling if (bla) everywhere. If functionality is duplicated, you define a common helper. Feel free to ask if something's unclear. ... > @@ -3804,6 +3963,9 @@ static int probe_one_instance(unsigned int nid) > struct ecc_settings *s; > int ret; > > + if (!F3) > + return 0; > + > ret = -ENOMEM; > s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL); > if (!s) > @@ -3815,6 +3977,9 @@ static int probe_one_instance(unsigned int nid) > if (!pvt) > goto err_settings; > > + if (nid >= NONCPU_NODE_INDEX) > + pvt->is_noncpu = true; This is silly and error-prone. Proper detection should happen in per_family_init() and there you should read out from the hardware whether this is a GPU or a CPU node. Then, you should put an enum type in amd64_family_type which has { FAM_TYPE_CPU, FAM_TYPE_GPU, ... } etc and the places where you need to check whether it is CPU or a GPU, test those types. > + > pvt->mc_node_id = nid; > pvt->F3 = F3; > > @@ -3888,6 +4053,10 @@ static void remove_one_instance(unsigned int nid) > struct mem_ctl_info *mci; > struct amd64_pvt *pvt; > > + /* Nothing to remove for the space holder entries */ > + if (!F3) > + return; > + > /* Remove from EDAC CORE tracking list */ > mci = edac_mc_del_mc(&F3->dev); > if (!mci) > diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h > index 85aa820bc165..0844f004c90b 100644 > --- a/drivers/edac/amd64_edac.h > +++ b/drivers/edac/amd64_edac.h > @@ -126,6 +126,8 @@ > #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446 > #define PCI_DEVICE_ID_AMD_19H_DF_F0 0x1650 > #define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656 > +#define PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F0 0x14D0 > +#define PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F6 0x14D6 > > /* > * Function 1 - Address Map > @@ -298,6 +300,7 @@ enum amd_families { > F17_M60H_CPUS, > F17_M70H_CPUS, > F19_CPUS, > + ALDEBARAN_GPUS, > NUM_FAMILIES, > }; > > @@ -389,6 +392,9 @@ struct amd64_pvt { > enum mem_type dram_type; > > struct amd64_umc *umc; /* UMC registers */ > + char buf[20]; A 20 char buffer in every pvt structure just so that you can sprintf into it when it is a GPU? Err, I don't think so. You can do the same thing as with the CPUs - the same string for every pvt instance.
On Fri, Aug 27, 2021 at 01:30:57PM +0200, Borislav Petkov wrote: > On Tue, Aug 24, 2021 at 12:24:37AM +0530, Naveen Krishna Chatradhi wrote: ... > > @@ -1335,6 +1392,11 @@ static void determine_memory_type(struct amd64_pvt *pvt) > > u32 dram_ctrl, dcsm; > > > > if (pvt->umc) { > > + if (pvt->is_noncpu) { > > + pvt->dram_type = MEM_HBM2; > > + return; > > + } > > I don't like this sprinkling of "if (pvt->is_noncpu)" everywhere, > at all. Please define a separate read_mc_regs_df() or so which > contains only the needed functionalty which you can carve out from > read_mc_regs(). > I like this idea. > > + > > if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) > > pvt->dram_type = MEM_LRDDR4; > > else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) > > @@ -1724,7 +1786,10 @@ static int f17_early_channel_count(struct amd64_pvt *pvt) > > > > /* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */ > > for_each_umc(i) > > - channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT); > > + if (pvt->is_noncpu) > > + channels += pvt->csels[i].b_cnt; > > + else > > + channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT); > > > > amd64_info("MCT channel count: %d\n", channels); > > > > No, a separate gpu_early_channel_count() is needed here. There's a > reason for those function pointers getting assigned depending on family. > Good point. ... > > +/* > > + * The CPUs have one channel per UMC, So a UMC number is equivalent to a > > + * channel number. The NONCPUs have 8 channels per UMC, so the UMC number no > > + * longer works as a channel number. > > + * The channel number within a NONCPU UMC is given in MCA_IPID[15:12]. > > + * However, the IDs are split such that two UMC values go to one UMC, and > > + * the channel numbers are split in two groups of four. > > + * > > + * Refer comment on get_noncpu_umc_base() from amd64_edac.h > > + * > > + * For example, > > + * UMC0 CH[3:0] = 0x0005[3:0]000 > > + * UMC0 CH[7:4] = 0x0015[3:0]000 > > + * UMC1 CH[3:0] = 0x0025[3:0]000 > > + * UMC1 CH[7:4] = 0x0035[3:0]000 > > + */ > > +static int find_umc_channel_noncpu(struct mce *m) > > +{ > > + u8 umc = find_umc_channel(m); > > + u8 ch = ((m->ipid >> 12) & 0xf); > > + > > + return umc % 2 ? (ch + 4) : ch; > > +} > > + > > static void decode_umc_error(int node_id, struct mce *m) > > { > > u8 ecc_type = (m->status >> 45) & 0x3; > > @@ -2897,6 +3003,7 @@ static void decode_umc_error(int node_id, struct mce *m) > > struct amd64_pvt *pvt; > > struct err_info err; > > u64 sys_addr; > > + u8 df_inst_id; > > You don't need that variable and can work with err.channel just fine. > > > mci = edac_mc_find(node_id); > > if (!mci) > > @@ -2909,7 +3016,22 @@ static void decode_umc_error(int node_id, struct mce *m) > > if (m->status & MCI_STATUS_DEFERRED) > > ecc_type = 3; > > > > - err.channel = find_umc_channel(m); > > + if (pvt->is_noncpu) { > > + /* > > + * The NONCPUs have one Chip Select per UMC, so the UMC number > > + * can used as the Chip Select number. However, the UMC number > > + * is split in the ID value so it's necessary to divide by 2. > > + */ > > + err.csrow = find_umc_channel(m) / 2; > > + err.channel = find_umc_channel_noncpu(m); > > + /* On NONCPUs, instance id is calculated as below. */ > > + df_inst_id = err.csrow * 8 + err.channel; > > err.channel += err.csrow * 8; > > tadaaa! > err.channel still needs to be used in error_address_to_page_and_offset() below. So changing it here messes up what's reported to EDAC. ... > > @@ -3804,6 +3963,9 @@ static int probe_one_instance(unsigned int nid) > > struct ecc_settings *s; > > int ret; > > > > + if (!F3) > > + return 0; > > + > > ret = -ENOMEM; > > s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL); > > if (!s) > > @@ -3815,6 +3977,9 @@ static int probe_one_instance(unsigned int nid) > > if (!pvt) > > goto err_settings; > > > > + if (nid >= NONCPU_NODE_INDEX) > > + pvt->is_noncpu = true; > > This is silly and error-prone. Proper detection should happen in > per_family_init() and there you should read out from the hardware > whether this is a GPU or a CPU node. > > Then, you should put an enum type in amd64_family_type which has > > { FAM_TYPE_CPU, FAM_TYPE_GPU, ... } > > etc and the places where you need to check whether it is CPU or a GPU, > test those types. > This is a good idea. But we have a global *fam_type, so this should be moved into struct amd64_pvt, if possible. Then each node can have its own fam_type. .. > > @@ -389,6 +392,9 @@ struct amd64_pvt { > > enum mem_type dram_type; > > > > struct amd64_umc *umc; /* UMC registers */ > > + char buf[20]; > > A 20 char buffer in every pvt structure just so that you can sprintf > into it when it is a GPU? Err, I don't think so. > > You can do the same thing as with the CPUs - the same string for every > pvt instance. > Fair point. I like the idea of having unique names though. Is this possible with the current EDAC framework? Or is it not worth it? Thanks, Yazen
On Wed, Sep 01, 2021 at 06:42:26PM +0000, Yazen Ghannam wrote: > err.channel still needs to be used in error_address_to_page_and_offset() > below. I think you mean __log_ecc_error(). > This is a good idea. But we have a global *fam_type, so this should be moved > into struct amd64_pvt, if possible. Then each node can have its own fam_type. per_family_init() does assign stuff to pvt members so yes, we're saying the same thing, practically. > Fair point. I like the idea of having unique names though. Is this possible > with the current EDAC framework? Or is it not worth it? We don't have unique names for the CPU nodes: [ 25.637486] EDAC MC0: Giving out device to module amd64_edac controller F17h_M30h: DEV 0000:00:18.3 (INTERRUPT) [ 25.799554] EDAC MC1: Giving out device to module amd64_edac controller F17h_M30h: DEV 0000:00:19.3 (INTERRUPT) why does it matter to have unique names for the accelerators? If you wanna differentiate them, you can dump the PCI devs like above. Just to make it clear - I'm not against it per-se - I'd just need a stronger justification for doing this than just "I like the idea".
On Wed, Sep 08, 2021 at 08:41:46PM +0200, Borislav Petkov wrote: > On Wed, Sep 01, 2021 at 06:42:26PM +0000, Yazen Ghannam wrote: > > err.channel still needs to be used in error_address_to_page_and_offset() > > below. > > I think you mean __log_ecc_error(). > Yep, you're right. > > This is a good idea. But we have a global *fam_type, so this should be moved > > into struct amd64_pvt, if possible. Then each node can have its own fam_type. > > per_family_init() does assign stuff to pvt members so yes, we're saying > the same thing, practically. > > > Fair point. I like the idea of having unique names though. Is this possible > > with the current EDAC framework? Or is it not worth it? > > We don't have unique names for the CPU nodes: > > [ 25.637486] EDAC MC0: Giving out device to module amd64_edac controller F17h_M30h: DEV 0000:00:18.3 (INTERRUPT) > [ 25.799554] EDAC MC1: Giving out device to module amd64_edac controller F17h_M30h: DEV 0000:00:19.3 (INTERRUPT) > > why does it matter to have unique names for the accelerators? > > If you wanna differentiate them, you can dump the PCI devs like above. > > Just to make it clear - I'm not against it per-se - I'd just need a > stronger justification for doing this than just "I like the idea". > There isn't a strong reason at the moment. I think it may be one less hurdle for users to go through when identifying a device. But, as you said, there are other ways to differentiate devices. Thanks, Yazen
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index f0d8f60acee1..452556adc1f9 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1020,6 +1020,9 @@ static unsigned long determine_edac_cap(struct amd64_pvt *pvt) if (umc_en_mask == dimm_ecc_en_mask) edac_cap = EDAC_FLAG_SECDED; + + if (pvt->is_noncpu) + edac_cap = EDAC_FLAG_SECDED; } else { bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F) ? 19 @@ -1078,6 +1081,9 @@ static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) { int cs_mode = 0; + if (pvt->is_noncpu) + return CS_EVEN_PRIMARY | CS_ODD_PRIMARY; + if (csrow_enabled(2 * dimm, ctrl, pvt)) cs_mode |= CS_EVEN_PRIMARY; @@ -1097,6 +1103,15 @@ static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl); + if (pvt->is_noncpu) { + cs_mode = f17_get_cs_mode(cs0, ctrl, pvt); + for_each_chip_select(cs0, ctrl, pvt) { + size0 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs0); + amd64_info(EDAC_MC ": %d: %5dMB\n", cs0, size0); + } + return; + } + for (dimm = 0; dimm < 2; dimm++) { cs0 = dimm * 2; cs1 = dimm * 2 + 1; @@ -1121,10 +1136,15 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt) umc_base = get_umc_base(i); umc = &pvt->umc[i]; - edac_dbg(1, "UMC%d DIMM cfg: 0x%x\n", i, umc->dimm_cfg); + if (!pvt->is_noncpu) + edac_dbg(1, "UMC%d DIMM cfg: 0x%x\n", i, umc->dimm_cfg); edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg); edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl); edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl); + if (pvt->is_noncpu) { + edac_dbg(1, "UMC%d All HBMs support ECC: yes\n", i); + goto dimm_size; + } amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ECC_BAD_SYMBOL, &tmp); edac_dbg(1, "UMC%d ECC bad symbol: 0x%x\n", i, tmp); @@ -1149,6 +1169,7 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt) i, 1 << ((tmp >> 4) & 0x3)); } + dimm_size: debug_display_dimm_sizes_df(pvt, i); } @@ -1218,8 +1239,13 @@ static void prep_chip_selects(struct amd64_pvt *pvt) int umc; for_each_umc(umc) { - pvt->csels[umc].b_cnt = 4; - pvt->csels[umc].m_cnt = 2; + if (pvt->is_noncpu) { + pvt->csels[umc].b_cnt = 8; + pvt->csels[umc].m_cnt = 8; + } else { + pvt->csels[umc].b_cnt = 4; + pvt->csels[umc].m_cnt = 2; + } } } else { @@ -1228,6 +1254,33 @@ static void prep_chip_selects(struct amd64_pvt *pvt) } } +static void read_noncpu_umc_base_mask(struct amd64_pvt *pvt) +{ + u32 base_reg, mask_reg; + u32 *base, *mask; + int umc, cs; + + for_each_umc(umc) { + for_each_chip_select(cs, umc, pvt) { + base_reg = get_noncpu_umc_base(umc, cs) + UMCCH_BASE_ADDR; + base = &pvt->csels[umc].csbases[cs]; + + if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) { + edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *base, base_reg); + } + + mask_reg = get_noncpu_umc_base(umc, cs) + UMCCH_ADDR_MASK; + mask = &pvt->csels[umc].csmasks[cs]; + + if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) { + edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *mask, mask_reg); + } + } + } +} + static void read_umc_base_mask(struct amd64_pvt *pvt) { u32 umc_base_reg, umc_base_reg_sec; @@ -1288,8 +1341,12 @@ static void read_dct_base_mask(struct amd64_pvt *pvt) prep_chip_selects(pvt); - if (pvt->umc) - return read_umc_base_mask(pvt); + if (pvt->umc) { + if (pvt->is_noncpu) + return read_noncpu_umc_base_mask(pvt); + else + return read_umc_base_mask(pvt); + } for_each_chip_select(cs, 0, pvt) { int reg0 = DCSB0 + (cs * 4); @@ -1335,6 +1392,11 @@ static void determine_memory_type(struct amd64_pvt *pvt) u32 dram_ctrl, dcsm; if (pvt->umc) { + if (pvt->is_noncpu) { + pvt->dram_type = MEM_HBM2; + return; + } + if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) pvt->dram_type = MEM_LRDDR4; else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) @@ -1724,7 +1786,10 @@ static int f17_early_channel_count(struct amd64_pvt *pvt) /* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */ for_each_umc(i) - channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT); + if (pvt->is_noncpu) + channels += pvt->csels[i].b_cnt; + else + channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT); amd64_info("MCT channel count: %d\n", channels); @@ -1865,6 +1930,12 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, u32 msb, weight, num_zero_bits; int dimm, size = 0; + if (pvt->is_noncpu) { + addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr]; + /* The memory channels in case of GPUs are fully populated */ + goto skip_noncpu; + } + /* No Chip Selects are enabled. */ if (!cs_mode) return size; @@ -1890,6 +1961,7 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, else addr_mask_orig = pvt->csels[umc].csmasks[dimm]; + skip_noncpu: /* * The number of zero bits in the mask is equal to the number of bits * in a full mask minus the number of bits in the current mask. @@ -2635,6 +2707,16 @@ static struct amd64_family_type family_types[] = { .dbam_to_cs = f17_addr_mask_to_cs_size, } }, + [ALDEBARAN_GPUS] = { + .ctl_name = "ALDEBARAN", + .f0_id = PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F6, + .max_mcs = 4, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_addr_mask_to_cs_size, + } + }, }; /* @@ -2890,6 +2972,30 @@ static int find_umc_channel(struct mce *m) return (m->ipid & GENMASK(31, 0)) >> 20; } +/* + * The CPUs have one channel per UMC, So a UMC number is equivalent to a + * channel number. The NONCPUs have 8 channels per UMC, so the UMC number no + * longer works as a channel number. + * The channel number within a NONCPU UMC is given in MCA_IPID[15:12]. + * However, the IDs are split such that two UMC values go to one UMC, and + * the channel numbers are split in two groups of four. + * + * Refer comment on get_noncpu_umc_base() from amd64_edac.h + * + * For example, + * UMC0 CH[3:0] = 0x0005[3:0]000 + * UMC0 CH[7:4] = 0x0015[3:0]000 + * UMC1 CH[3:0] = 0x0025[3:0]000 + * UMC1 CH[7:4] = 0x0035[3:0]000 + */ +static int find_umc_channel_noncpu(struct mce *m) +{ + u8 umc = find_umc_channel(m); + u8 ch = ((m->ipid >> 12) & 0xf); + + return umc % 2 ? (ch + 4) : ch; +} + static void decode_umc_error(int node_id, struct mce *m) { u8 ecc_type = (m->status >> 45) & 0x3; @@ -2897,6 +3003,7 @@ static void decode_umc_error(int node_id, struct mce *m) struct amd64_pvt *pvt; struct err_info err; u64 sys_addr; + u8 df_inst_id; mci = edac_mc_find(node_id); if (!mci) @@ -2909,7 +3016,22 @@ static void decode_umc_error(int node_id, struct mce *m) if (m->status & MCI_STATUS_DEFERRED) ecc_type = 3; - err.channel = find_umc_channel(m); + if (pvt->is_noncpu) { + /* + * The NONCPUs have one Chip Select per UMC, so the UMC number + * can used as the Chip Select number. However, the UMC number + * is split in the ID value so it's necessary to divide by 2. + */ + err.csrow = find_umc_channel(m) / 2; + err.channel = find_umc_channel_noncpu(m); + /* On NONCPUs, instance id is calculated as below. */ + df_inst_id = err.csrow * 8 + err.channel; + } else { + /* On CPUs, "Channel"="UMC Number"="DF Instance ID". */ + err.channel = find_umc_channel(m); + err.csrow = m->synd & 0x7; + df_inst_id = err.channel; + } if (!(m->status & MCI_STATUS_SYNDV)) { err.err_code = ERR_SYND; @@ -2925,9 +3047,7 @@ static void decode_umc_error(int node_id, struct mce *m) err.err_code = ERR_CHANNEL; } - err.csrow = m->synd & 0x7; - - if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { + if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, df_inst_id, &sys_addr)) { err.err_code = ERR_NORM_ADDR; goto log_error; } @@ -3054,15 +3174,21 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt) /* Read registers from each UMC */ for_each_umc(i) { + if (pvt->is_noncpu) + umc_base = get_noncpu_umc_base(i, 0); + else + umc_base = get_umc_base(i); - umc_base = get_umc_base(i); umc = &pvt->umc[i]; - amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg); amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg); amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl); amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl); - amd_smn_read(nid, umc_base + UMCCH_UMC_CAP_HI, &umc->umc_cap_hi); + + if (!pvt->is_noncpu) { + amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg); + amd_smn_read(nid, umc_base + UMCCH_UMC_CAP_HI, &umc->umc_cap_hi); + } } } @@ -3144,7 +3270,9 @@ static void read_mc_regs(struct amd64_pvt *pvt) determine_memory_type(pvt); edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); - determine_ecc_sym_sz(pvt); + /* ECC symbol size is not available on NONCPU nodes */ + if (!pvt->is_noncpu) + determine_ecc_sym_sz(pvt); } /* @@ -3232,15 +3360,21 @@ static int init_csrows_df(struct mem_ctl_info *mci) continue; empty = 0; - dimm = mci->csrows[cs]->channels[umc]->dimm; + if (pvt->is_noncpu) { + dimm = mci->csrows[umc]->channels[cs]->dimm; + dimm->edac_mode = EDAC_SECDED; + dimm->dtype = DEV_X16; + } else { + dimm = mci->csrows[cs]->channels[umc]->dimm; + dimm->edac_mode = edac_mode; + dimm->dtype = dev_type; + } edac_dbg(1, "MC node: %d, csrow: %d\n", pvt->mc_node_id, cs); dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs); dimm->mtype = pvt->dram_type; - dimm->edac_mode = edac_mode; - dimm->dtype = dev_type; dimm->grain = 64; } } @@ -3505,7 +3639,9 @@ static bool ecc_enabled(struct amd64_pvt *pvt) umc_en_mask |= BIT(i); - if (umc->umc_cap_hi & UMC_ECC_ENABLED) + /* ECC is enabled by default on NONCPU nodes */ + if (pvt->is_noncpu || + (umc->umc_cap_hi & UMC_ECC_ENABLED)) ecc_en_mask |= BIT(i); } @@ -3541,6 +3677,11 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) { u8 i, ecc_en = 1, cpk_en = 1, dev_x4 = 1, dev_x16 = 1; + if (pvt->is_noncpu) { + mci->edac_ctl_cap |= EDAC_SECDED; + return; + } + for_each_umc(i) { if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) { ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED); @@ -3571,7 +3712,11 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; - mci->mtype_cap = MEM_FLAG_DDR2 | MEM_FLAG_RDDR2; + if (pvt->is_noncpu) + mci->mtype_cap = MEM_FLAG_HBM2; + else + mci->mtype_cap = MEM_FLAG_DDR2 | MEM_FLAG_RDDR2; + mci->edac_ctl_cap = EDAC_FLAG_NONE; if (pvt->umc) { @@ -3676,11 +3821,24 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) fam_type = &family_types[F17_M70H_CPUS]; pvt->ops = &family_types[F17_M70H_CPUS].ops; fam_type->ctl_name = "F19h_M20h"; - break; + } else if (pvt->model >= 0x30 && pvt->model <= 0x3f) { + if (pvt->is_noncpu) { + int tmp = pvt->mc_node_id - NONCPU_NODE_INDEX; + + fam_type = &family_types[ALDEBARAN_GPUS]; + pvt->ops = &family_types[ALDEBARAN_GPUS].ops; + sprintf(pvt->buf, "Aldebaran#%ddie#%d", tmp / 2, tmp % 2); + fam_type->ctl_name = pvt->buf; + } else { + fam_type = &family_types[F19_CPUS]; + pvt->ops = &family_types[F19_CPUS].ops; + fam_type->ctl_name = "F19h_M30h"; + } + } else { + fam_type = &family_types[F19_CPUS]; + pvt->ops = &family_types[F19_CPUS].ops; + family_types[F19_CPUS].ctl_name = "F19h"; } - fam_type = &family_types[F19_CPUS]; - pvt->ops = &family_types[F19_CPUS].ops; - family_types[F19_CPUS].ctl_name = "F19h"; break; default: @@ -3748,9 +3906,10 @@ static int init_one_instance(struct amd64_pvt *pvt) if (pvt->channel_count < 0) return ret; + /* Define layers for CPU and NONCPU nodes */ ret = -ENOMEM; layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = pvt->csels[0].b_cnt; + layers[0].size = pvt->is_noncpu ? fam_type->max_mcs : pvt->csels[0].b_cnt; layers[0].is_virt_csrow = true; layers[1].type = EDAC_MC_LAYER_CHANNEL; @@ -3759,7 +3918,7 @@ static int init_one_instance(struct amd64_pvt *pvt) * only one channel. Also, this simplifies handling later for the price * of a couple of KBs tops. */ - layers[1].size = fam_type->max_mcs; + layers[1].size = pvt->is_noncpu ? pvt->csels[0].b_cnt : fam_type->max_mcs; layers[1].is_virt_csrow = false; mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0); @@ -3804,6 +3963,9 @@ static int probe_one_instance(unsigned int nid) struct ecc_settings *s; int ret; + if (!F3) + return 0; + ret = -ENOMEM; s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL); if (!s) @@ -3815,6 +3977,9 @@ static int probe_one_instance(unsigned int nid) if (!pvt) goto err_settings; + if (nid >= NONCPU_NODE_INDEX) + pvt->is_noncpu = true; + pvt->mc_node_id = nid; pvt->F3 = F3; @@ -3888,6 +4053,10 @@ static void remove_one_instance(unsigned int nid) struct mem_ctl_info *mci; struct amd64_pvt *pvt; + /* Nothing to remove for the space holder entries */ + if (!F3) + return; + /* Remove from EDAC CORE tracking list */ mci = edac_mc_del_mc(&F3->dev); if (!mci) diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 85aa820bc165..0844f004c90b 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -126,6 +126,8 @@ #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446 #define PCI_DEVICE_ID_AMD_19H_DF_F0 0x1650 #define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656 +#define PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F0 0x14D0 +#define PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F6 0x14D6 /* * Function 1 - Address Map @@ -298,6 +300,7 @@ enum amd_families { F17_M60H_CPUS, F17_M70H_CPUS, F19_CPUS, + ALDEBARAN_GPUS, NUM_FAMILIES, }; @@ -389,6 +392,9 @@ struct amd64_pvt { enum mem_type dram_type; struct amd64_umc *umc; /* UMC registers */ + char buf[20]; + + bool is_noncpu; }; enum err_codes { @@ -410,6 +416,28 @@ struct err_info { u32 offset; }; +static inline u32 get_noncpu_umc_base(u8 umc, u8 channel) +{ + /* + * On CPUs, there is one channel per UMC, so UMC numbering equals + * channel numbering. On NONCPUs, there are eight channels per UMC, + * so the channel numbering is different from UMC numbering. + * + * On CPU nodes channels are selected in 6th nibble + * UMC chY[3:0]= [(chY*2 + 1) : (chY*2)]50000; + * + * On NONCPU nodes channels are selected in 3rd nibble + * HBM chX[3:0]= [Y ]5X[3:0]000; + * HBM chX[7:4]= [Y+1]5X[3:0]000 + */ + umc *= 2; + + if (channel >= 4) + umc++; + + return 0x50000 + (umc << 20) + ((channel % 4) << 12); +} + static inline u32 get_umc_base(u8 channel) { /* chY: 0xY50000 */