Message ID | 20211028130106.15701-2-nchatrad@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | x86/edac/amd64: Add heterogeneous node support | expand |
On Thu, Oct 28, 2021 at 06:31:02PM +0530, Naveen Krishna Chatradhi wrote: > +/* GPU Data Fabric ID Device 24 Function 1 */ > +#define PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F1 0x14d1 > + > +/* DF18xF1 registers on Aldebaran GPU */ > +#define REG_LOCAL_NODE_TYPE_MAP 0x144 > +#define REG_RMT_NODE_TYPE_MAP 0x148 Move those defines up, along with the rest of them. While at it, you can align them all vertically. > + > +/* > + * Newer AMD CPUs and GPUs whose data fabrics can be connected via custom xGMI "Newer" is a commit message type of adjective and doesn't belong in permanent comments because when years pass, they won't be "newer" anymore. IOW, you can simply drop it here. > + * links, comes with registers to gather local and remote node type map info. "come" > + * > + * "Local Node Type" refers to nodes with the same type as that from which the > + * register is read, and "Remote Node Type" refers to nodes with a different type. This sure sounds weird. With my simplistic thinking I'd assume "local" is the CPU and "remote" is the GPU... > + * This function, reads the registers from GPU DF function 1. > + * Hence, local nodes are GPU and remote nodes are CPUs. > + */ > +static int amd_get_node_map(void) > +{ > + struct amd_node_map *nodemap; > + struct pci_dev *pdev; > + u32 tmp; > + > + pdev = pci_get_device(PCI_VENDOR_ID_AMD, > + PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F1, NULL); > + if (!pdev) { > + pr_debug("DF Func1 PCI device not found on this node.\n"); > + return -ENODEV; > + } > + > + nodemap = kmalloc(sizeof(*nodemap), GFP_KERNEL); You allocate a whopping 4 bytes? Just do struct amd_node_map nodemap; in the nb info descriptor. I still need to see whether those node maps and functions you're adding and exporting even make sense but that will happen later. > + if (!nodemap) > + return -ENOMEM; > + > + pci_read_config_dword(pdev, REG_LOCAL_NODE_TYPE_MAP, &tmp); Check retval. > + nodemap->gpu_node_start_id = tmp & 0xFFF; > + > + pci_read_config_dword(pdev, REG_RMT_NODE_TYPE_MAP, &tmp); Ditto. > + nodemap->cpu_node_count = tmp >> 16 & 0xFFF; > + > + amd_northbridges.nodemap = nodemap; > + return 0; > +} > + > static struct pci_dev *next_northbridge(struct pci_dev *dev, > const struct pci_device_id *ids) > { > @@ -230,6 +297,27 @@ int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) > } > EXPORT_SYMBOL_GPL(amd_df_indirect_read); > > +struct pci_dev *get_root_devs(struct pci_dev *root, static > + const struct pci_device_id *root_ids, > + u16 roots_per_misc) > +{ > + u16 j; > + > + /* > + * If there are more PCI root devices than data fabric/ > + * system management network interfaces, then the (N) > + * PCI roots per DF/SMN interface are functionally the > + * same (for DF/SMN access) and N-1 are redundant. N-1 > + * PCI roots should be skipped per DF/SMN interface so > + * the following DF/SMN interfaces get mapped to > + * correct PCI roots. > + */ > + for (j = 0; j < roots_per_misc; j++) > + root = next_northbridge(root, root_ids); > + > + return root; > +} > + > int amd_cache_northbridges(void) > { > const struct pci_device_id *misc_ids = amd_nb_misc_ids; > @@ -237,10 +325,10 @@ int amd_cache_northbridges(void) > const struct pci_device_id *root_ids = amd_root_ids; > struct pci_dev *root, *misc, *link; > struct amd_northbridge *nb; > - u16 roots_per_misc = 0; > - u16 misc_count = 0; > - u16 root_count = 0; > - u16 i, j; > + u16 roots_per_misc = 0, gpu_roots_per_misc = 0; > + u16 misc_count = 0, gpu_misc_count = 0; > + u16 root_count = 0, gpu_root_count = 0; > + u16 i; > > if (amd_northbridges.num) > return 0; > @@ -252,15 +340,23 @@ int amd_cache_northbridges(void) > } > > misc = NULL; > - while ((misc = next_northbridge(misc, misc_ids)) != NULL) > - misc_count++; > + while ((misc = next_northbridge(misc, misc_ids)) != NULL) { Just remove that redundant "!= NULL" at the end, while at it. > + if (misc->device == PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F3) > + gpu_misc_count++; > + else > + misc_count++; > + } > > if (!misc_count) > return -ENODEV; > > root = NULL; > - while ((root = next_northbridge(root, root_ids)) != NULL) > - root_count++; > + while ((root = next_northbridge(root, root_ids)) != NULL) { > + if (root->device == PCI_DEVICE_ID_AMD_ALDEBARAN_ROOT) > + gpu_root_count++; > + else > + root_count++; > + } > > if (root_count) { > roots_per_misc = root_count / misc_count; > @@ -275,33 +371,37 @@ int amd_cache_northbridges(void) > } > } > > - nb = kcalloc(misc_count, sizeof(struct amd_northbridge), GFP_KERNEL); > + /* > + * The number of miscs, roots and roots_per_misc might vary on different > + * nodes of a heterogeneous system. > + * Calculate roots_per_misc accordingly in order to skip the redundant > + * roots and map the DF/SMN interfaces to correct PCI roots. > + */ Reflow that comment so that it is a block. > + if (gpu_root_count && gpu_misc_count) { > + int ret = amd_get_node_map(); > + ^ Superfluous newline. > + if (ret) > + return ret; > + > + gpu_roots_per_misc = gpu_root_count / gpu_misc_count; > + } > +
On Thu, Oct 28, 2021 at 06:31:02PM +0530, Naveen Krishna Chatradhi wrote: Staring at this more... > +/* > + * Newer AMD CPUs and GPUs whose data fabrics can be connected via custom xGMI > + * links, comes with registers to gather local and remote node type map info. > + * > + * "Local Node Type" refers to nodes with the same type as that from which the > + * register is read, and "Remote Node Type" refers to nodes with a different type. > + * > + * This function, reads the registers from GPU DF function 1. > + * Hence, local nodes are GPU and remote nodes are CPUs. > + */ > +static int amd_get_node_map(void) ... so this is a generic function name... > +{ > + struct amd_node_map *nodemap; > + struct pci_dev *pdev; > + u32 tmp; > + > + pdev = pci_get_device(PCI_VENDOR_ID_AMD, > + PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F1, NULL); ... but this here is trying to get the Aldebaran PCI device function. So what happens if in the future, the GPU is a different one and it gets RAS functionality and PCI device functions too? You'd probably need to add that new GPU support too. And then looking at that patch again, see how this new code is bolted on and sure, it all is made to work, but it is strenuous and you have to always pay attention to what type of devices you're dealing with. And the next patch does: ... if (bank_type == SMCA_UMC_V2) { /* do UMC v2 special stuff here. */ which begs the question: wouldn't this GPU PCI devices enumeration be a lot cleaner if it were separate? I.e., in amd_nb.c you'd have init_amd_nbs: amd_cache_northbridges(); amd_cache_gart(); amd_cache_gpu_devices(); and in this last one you do your enumeration. Completely separate data structures and all. Adding a new device support would then be trivial. And then looking at the next patch again, you have: + } else if (bank_type == SMCA_UMC_V2) { + /* + * SMCA_UMC_V2 exists on GPU nodes, extract the node id + * from register MCA_IPID[47:44](InstanceIdHi). + * The InstanceIdHi field represents the instance ID of the GPU. + * Which needs to be mapped to a value used by Linux, + * where GPU nodes are simply numerically after the CPU nodes. + */ + node_id = ((m->ipid >> 44) & 0xF) - + amd_gpu_node_start_id() + amd_cpu_node_count(); where instead of exporting those functions and having the caller do the calculations, you'd have a function in amd_nb.c which is called amd_get_gpu_node_id(unsigned long ipid) which will use those separate data structures mentioned above and give you the node id. And those GPU node IDs are placed numerically after the CPU nodes so your code doesn't need to do anything special - just read out registers and cache them. And you don't need those exports either - it is all nicely encapsulated and a single function is used to get the callers what they wanna know. Hmmm?
Hi Boris, On 11/2/2021 11:33 PM, Borislav Petkov wrote: > [CAUTION: External Email] > > On Thu, Oct 28, 2021 at 06:31:02PM +0530, Naveen Krishna Chatradhi wrote: > > Staring at this more... Thanks for taking the time. > >> +/* >> + * Newer AMD CPUs and GPUs whose data fabrics can be connected via custom xGMI >> + * links, comes with registers to gather local and remote node type map info. >> + * >> + * "Local Node Type" refers to nodes with the same type as that from which the >> + * register is read, and "Remote Node Type" refers to nodes with a different type. >> + * >> + * This function, reads the registers from GPU DF function 1. >> + * Hence, local nodes are GPU and remote nodes are CPUs. >> + */ >> +static int amd_get_node_map(void) > ... so this is a generic function name... > >> +{ >> + struct amd_node_map *nodemap; >> + struct pci_dev *pdev; >> + u32 tmp; >> + >> + pdev = pci_get_device(PCI_VENDOR_ID_AMD, >> + PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F1, NULL); > ... but this here is trying to get the Aldebaran PCI device function. I know, this is confusion. we will try to give a meaning for definition here. > > So what happens if in the future, the GPU is a different one and it > gets RAS functionality and PCI device functions too? You'd probably need > to add that new GPU support too. Yes, might happen > > And then looking at that patch again, see how this new code is bolted on > and sure, it all is made to work, but it is strenuous and you have to > always pay attention to what type of devices you're dealing with. > > And the next patch does: > > ... if (bank_type == SMCA_UMC_V2) { > > /* do UMC v2 special stuff here. */ > > which begs the question: wouldn't this GPU PCI devices enumeration be a > lot cleaner if it were separate? > > I.e., in amd_nb.c you'd have > > init_amd_nbs: > > amd_cache_northbridges(); > amd_cache_gart(); > amd_cache_gpu_devices(); Agreed. however, a slight modification to the suggestion Instead of modifying the init_amd_nbs() How about, defining a new struct +struct system_topology { + const struct pci_device_id *misc_ids; + const struct pci_device_id *link_ids; + const struct pci_device_id *root_ids; + u16 roots_per_misc; + u16 misc_count; + u16 root_count; +}; and modifying the amd_cache_northbridges() to +int amd_cache_northbridges(void) +{ + struct system_toplogy topo; + int ret; + + if (amd_northbridges.num) + return 0; + + ret = amd_cpu_nbs(&topo); + printk("==> misc:%d\n", ret); + + if (look_for_remote_nodes()) { + ret = amd_gpu_nbs(&topo); + printk("==> gpu_misc:%d\n", ret); + } + + get_next_northbridges(&topo); This way, creating appropriate number MCs under EDAC and existing exported APIs can remain the same. Let me know your thoughts on this. I can send an updated version with your comments addressed. > > and in this last one you do your enumeration. Completely separate data > structures and all. Adding a new device support would then be trivial. > > And then looking at the next patch again, you have: > > + } else if (bank_type == SMCA_UMC_V2) { > + /* > + * SMCA_UMC_V2 exists on GPU nodes, extract the node id > + * from register MCA_IPID[47:44](InstanceIdHi). > + * The InstanceIdHi field represents the instance ID of the GPU. > + * Which needs to be mapped to a value used by Linux, > + * where GPU nodes are simply numerically after the CPU nodes. > + */ > + node_id = ((m->ipid >> 44) & 0xF) - > + amd_gpu_node_start_id() + amd_cpu_node_count(); > > where instead of exporting those functions and having the caller do the > calculations, you'd have a function in amd_nb.c which is called > > amd_get_gpu_node_id(unsigned long ipid) > > which will use those separate data structures mentioned above and give > you the node id. Sure, we can modify this way. > > And those GPU node IDs are placed numerically after the CPU nodes so > your code doesn't need to do anything special - just read out registers > and cache them. > > And you don't need those exports either - it is all nicely encapsulated > and a single function is used to get the callers what they wanna know. Got it, thank you. > > Hmmm? > > -- > Regards/Gruss, > Boris. > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpeople.kernel.org%2Ftglx%2Fnotes-about-netiquette&data=04%7C01%7CNaveenKrishna.Chatradhi%40amd.com%7Cdd5b3586178441f4886808d99e2b1ef3%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637714730331703852%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=oXDojOFqVVhxn4P1tgwLycaJgc2rvwo8EoUj3i971Mw%3D&reserved=0
Hi Boris, On 11/2/2021 11:33 PM, Borislav Petkov wrote: > [CAUTION: External Email] > > On Thu, Oct 28, 2021 at 06:31:02PM +0530, Naveen Krishna Chatradhi wrote: > > Staring at this more... Thanks for taking the time. >> +/* >> + * Newer AMD CPUs and GPUs whose data fabrics can be connected via custom xGMI >> + * links, comes with registers to gather local and remote node type map info. >> + * >> + * "Local Node Type" refers to nodes with the same type as that from which the >> + * register is read, and "Remote Node Type" refers to nodes with a different type. >> + * >> + * This function, reads the registers from GPU DF function 1. >> + * Hence, local nodes are GPU and remote nodes are CPUs. >> + */ >> +static int amd_get_node_map(void) > ... so this is a generic function name... >> +{ >> + struct amd_node_map *nodemap; >> + struct pci_dev *pdev; >> + u32 tmp; >> + >> + pdev = pci_get_device(PCI_VENDOR_ID_AMD, >> + PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F1, NULL); > ... but this here is trying to get the Aldebaran PCI device function. I know, this is confusion. we will try to give a meaning for definition here. > So what happens if in the future, the GPU is a different one and it > gets RAS functionality and PCI device functions too? You'd probably need > to add that new GPU support too. Yes, might happen > And then looking at that patch again, see how this new code is bolted on > and sure, it all is made to work, but it is strenuous and you have to > always pay attention to what type of devices you're dealing with. > > And the next patch does: > > ... if (bank_type == SMCA_UMC_V2) { > > /* do UMC v2 special stuff here. */ > > which begs the question: wouldn't this GPU PCI devices enumeration be a > lot cleaner if it were separate? > > I.e., in amd_nb.c you'd have > > init_amd_nbs: > > amd_cache_northbridges(); > amd_cache_gart(); > amd_cache_gpu_devices(); Agreed. however, a slight modification to the suggestion Instead of modiying the init_amd_nbs() How about, modifying the amd_cache_northbridges() to define a call amd_prep_cpu_nbs() > and in this last one you do your enumeration. Completely separate data > structures and all. Adding a new device support would then be trivial. > > And then looking at the next patch again, you have: > > + } else if (bank_type == SMCA_UMC_V2) { > + /* > + * SMCA_UMC_V2 exists on GPU nodes, extract the node id > + * from register MCA_IPID[47:44](InstanceIdHi). > + * The InstanceIdHi field represents the instance ID of the GPU. > + * Which needs to be mapped to a value used by Linux, > + * where GPU nodes are simply numerically after the CPU nodes. > + */ > + node_id = ((m->ipid >> 44) & 0xF) - > + amd_gpu_node_start_id() + amd_cpu_node_count(); > > where instead of exporting those functions and having the caller do the > calculations, you'd have a function in amd_nb.c which is called > > amd_get_gpu_node_id(unsigned long ipid) > > which will use those separate data structures mentioned above and give > you the node id. > > And those GPU node IDs are placed numerically after the CPU nodes so > your code doesn't need to do anything special - just read out registers > and cache them. > > And you don't need those exports either - it is all nicely encapsulated > and a single function is used to get the callers what they wanna know. > > Hmmm? > > -- > Regards/Gruss, > Boris. > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpeople.kernel.org%2Ftglx%2Fnotes-about-netiquette&data=04%7C01%7CNaveenKrishna.Chatradhi%40amd.com%7Cdd5b3586178441f4886808d99e2b1ef3%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637714730331703852%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=oXDojOFqVVhxn4P1tgwLycaJgc2rvwo8EoUj3i971Mw%3D&reserved=0
On Thu, Nov 04, 2021 at 06:48:29PM +0530, Chatradhi, Naveen Krishna wrote: > I know, this is confusion. we will try to give a meaning for definition > here. Well, not that - you will need to keep adding PCI device IDs for the future GPUs supporting this stuff. > How about, defining a new struct > > +struct system_topology { > + const struct pci_device_id *misc_ids; > + const struct pci_device_id *link_ids; > + const struct pci_device_id *root_ids; > + u16 roots_per_misc; > + u16 misc_count; > + u16 root_count; > +}; Well, how does having a single struct help make things easier? IOW, if you use accessors to get the information you need, it doesn't really matter what the underlying organization of the data is. And if it helps to keep 'em separate because stuff is simple altogether, then, they should be separate. So, before you ask "How about", think of answering the question "Why should it be done this way? What are the advantages?" > This way, creating appropriate number MCs under EDAC and existing exported > APIs can remain the same. Why does that matter? Also, have you verified in what order the init_amd_nbs() fs initcall and amd64_edac_init() get executed? I'm going to venture a pretty sure guess that the initcall runs first and that amd_cache_northbridges() call in amd64_edac_init() is probably not even needed anymore... Thx.
Hi Boris, On 11/8/2021 7:04 PM, Borislav Petkov wrote: > [CAUTION: External Email] > > On Thu, Nov 04, 2021 at 06:48:29PM +0530, Chatradhi, Naveen Krishna wrote: >> I know, this is confusion. we will try to give a meaning for definition >> here. > Well, not that - you will need to keep adding PCI device IDs for the > future GPUs supporting this stuff. Creating a pci_device_id list for GPUs will be needed. > >> How about, defining a new struct >> >> +struct system_topology { >> + const struct pci_device_id *misc_ids; >> + const struct pci_device_id *link_ids; >> + const struct pci_device_id *root_ids; >> + u16 roots_per_misc; >> + u16 misc_count; >> + u16 root_count; >> +}; > Well, how does having a single struct help make things easier? Northbridges on CPUs and GPUs can be described using the elements in the above structure. > > IOW, if you use accessors to get the information you need, it doesn't > really matter what the underlying organization of the data is. And if > it helps to keep 'em separate because stuff is simple altogether, then, > they should be separate. I thought organizing the data in a structure would simplify the initialization of cpus and gpus. > > So, before you ask "How about", think of answering the question "Why > should it be done this way? What are the advantages?" I will modify the patch to enumerate gpu northbridge info only if there are gpu nodes with pci_device to access the node_map registers. > >> This way, creating appropriate number MCs under EDAC and existing exported >> APIs can remain the same. > Why does that matter? > > Also, have you verified in what order the init_amd_nbs() fs initcall and > amd64_edac_init() get executed? > > I'm going to venture a pretty sure guess that the initcall runs first > and that amd_cache_northbridges() call in amd64_edac_init() is probably > not even needed anymore... Yes, fs_initcall come before module_init (which inturn is device_init level 6). We can replace the amd_cache_northbridges() call in amd64_edac_init() with if(amd_nb_num() < 0). > Thx. > > -- > Regards/Gruss, > Boris. > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpeople.kernel.org%2Ftglx%2Fnotes-about-netiquette&data=04%7C01%7CNaveenKrishna.Chatradhi%40amd.com%7C45b6d75b206849ab022808d9a2bc7d4f%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637719752712242190%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=WW26EMOnu%2FBazap9njmnQIvY9hVSZxVpNol4uptGcec%3D&reserved=0
On Mon, Nov 08, 2021 at 10:23:49PM +0530, Chatradhi, Naveen Krishna wrote: > Northbridges on CPUs and GPUs can be described using the elements in the > above structure. If you're going to describe *northbridges*, then your struct cannot be called system_topology... > I thought organizing the data in a structure would simplify the > initialization of cpus and gpus. Ehh, did you even read my mail where I tried to explain that sprinkling if (gpu) this else that all over amd_cache_northbridges() is not proper design? ;-\ > I will modify the patch to enumerate gpu northbridge info only if there are > > gpu nodes with pci_device to access the node_map registers. Why would you do that? What's the advantage? How about you answer my questions first so that we agree on the design first before you go and do things? Hmm.
Hi Boris, On 11/9/2021 12:33 AM, Borislav Petkov wrote: > [CAUTION: External Email] > > On Mon, Nov 08, 2021 at 10:23:49PM +0530, Chatradhi, Naveen Krishna wrote: >> Northbridges on CPUs and GPUs can be described using the elements in the >> above structure. > If you're going to describe *northbridges*, then your struct cannot be called > system_topology... > >> I thought organizing the data in a structure would simplify the >> initialization of cpus and gpus. > Ehh, did you even read my mail where I tried to explain that sprinkling > > if (gpu) > this > else > that > > all over amd_cache_northbridges() is not proper design? > > ;-\ > >> I will modify the patch to enumerate gpu northbridge info only if there are >> >> gpu nodes with pci_device to access the node_map registers. > Why would you do that? What's the advantage? > > How about you answer my questions first so that we agree on the design > first before you go and do things? I was trying to handle both cpu and cpu northbridge enumeration in the amd_cache_northbridges() itself by reusing the existing structures and APIs. Should have seen this through more clearly. As, this is working well for the following reasons. a. Allocating the amd_northbridges.nb after identifying both the cpu and gpu misc devices, would extend node_to_amd_nb(node) for both cpu and gpu nodes. It is used extensively in this module. However, the roots_per_misc value is different in case of cpus and gpus and that needed to be handled seperately. b. amd_nb_num(void) is used by other modules in the kernel, returning the total count of CPU and GPU northbridges would break the existing code. I understood your point now. When we create separate functions for caching cpu and gpu devices, is it okay to create "struct amd_gpu_nb_info" with the following fields a. gpu_num; b. struct amd_northbridge *gpu_nb; c. gpu_node_start_id; While, amd_nb_num(), continues to return number of cpu NBs Add new API amd_gpu_nb_num(), return number of gpu NBs and modify the node_to_amd_nb(node) to extend the same behavior for gpu devices also Regards, naveenk > > Hmm. > > -- > Regards/Gruss, > Boris. > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpeople.kernel.org%2Ftglx%2Fnotes-about-netiquette&data=04%7C01%7CNaveenKrishna.Chatradhi%40amd.com%7Cad9aea0ddff0446d80b108d9a2ea867d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637719950521959255%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=PthEEyzphEyN3O1FrUcvKyMF%2FEb282qifUHPR6psFhg%3D&reserved=0
On Tue, Nov 09, 2021 at 05:00:11PM +0530, Chatradhi, Naveen Krishna wrote: > I was trying to handle both cpu and cpu northbridge enumeration in the > amd_cache_northbridges() itself by reusing the existing structures and APIs. > > Should have seen this through more clearly. As, this is working well for the > following reasons. Good, that's exactly what I meant! :) > a. Allocating the amd_northbridges.nb after identifying both the cpu and gpu > misc devices, would extend node_to_amd_nb(node) for both cpu and gpu nodes. Well, there's a reason those things are functions - so that you can do the necessary computation inside them and when stuff needs to change, users don't have to. That's why amd_northbridges is static and only the functions are exported. So if you want to make sure node_to_amd_nb() works for GPU nodes too, you simply have to look at the right "container" so to speak, depending on the number @node passed in as an argument and look it up in the proper array of pointers: [ CPU_NB0, CPU_NB1, ..., CPU_NB-N] [ GPU_NB0, ... ] you get the idea. > It is used extensively in this module. However, the roots_per_misc value > is different in case of cpus and gpus and that needed to be handled > seperately. > > b. amd_nb_num(void) is used by other modules in the kernel, returning the > total count of CPU and GPU northbridges would break the existing code. amd64_edac is using it too, to know how many driver instances to allocate. The other users - amd_gart_64.c and amd64-agp.c - are old stuff so they most certainly don't need to get the number of GPU nodes too. > I understood your point now. Good. > When we create separate functions for caching cpu and gpu devices, is it > okay to create "struct amd_gpu_nb_info" with the following fields > > a. gpu_num; > b. struct amd_northbridge *gpu_nb; > c. gpu_node_start_id; Makes sense. You need to put in it anything that describes the GPU NBs on the system and that other code would use. > While, amd_nb_num(), continues to return number of cpu NBs > Add new API amd_gpu_nb_num(), return number of gpu NBs > > and modify the node_to_amd_nb(node) to extend the same behavior for gpu > devices also Yap, exactly. And since amd64_edac is going to need the full NB count, you can use there both: num_nodes = amd_nb_num() + amd_gpu_nb_num(); ... Easy, peasy. :-) Thanks!
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 455066a06f60..a78d088dae40 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -68,10 +68,17 @@ struct amd_northbridge { struct threshold_bank *bank4; }; +/* heterogeneous system node type map variables */ +struct amd_node_map { + u16 gpu_node_start_id; + u16 cpu_node_count; +}; + struct amd_northbridge_info { u16 num; u64 flags; struct amd_northbridge *nb; + struct amd_node_map *nodemap; }; #define AMD_NB_GART BIT(0) @@ -83,6 +90,8 @@ struct amd_northbridge_info { u16 amd_nb_num(void); bool amd_nb_has_feature(unsigned int feature); struct amd_northbridge *node_to_amd_nb(int node); +u16 amd_gpu_node_start_id(void); +u16 amd_cpu_node_count(void); static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev) { diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index c92c9c774c0e..199d9d79edfb 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -19,6 +19,7 @@ #define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 #define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480 #define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630 +#define PCI_DEVICE_ID_AMD_ALDEBARAN_ROOT 0x14bb #define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494 @@ -28,6 +29,7 @@ #define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5 #define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d #define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e +#define PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F4 0x14d4 /* Protect the PCI config register pairs used for SMN and DF indirect access. */ static DEFINE_MUTEX(smn_mutex); @@ -40,6 +42,7 @@ static const struct pci_device_id amd_root_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_ALDEBARAN_ROOT) }, {} }; @@ -63,6 +66,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F3) }, {} }; @@ -81,6 +85,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F4) }, {} }; @@ -126,6 +131,68 @@ struct amd_northbridge *node_to_amd_nb(int node) } EXPORT_SYMBOL_GPL(node_to_amd_nb); +/* + * GPU start index and CPU count values on an heterogeneous system, + * these values will be used by the AMD EDAC and MCE modules. + */ +u16 amd_gpu_node_start_id(void) +{ + return (amd_northbridges.nodemap) ? + amd_northbridges.nodemap->gpu_node_start_id : 0; +} +EXPORT_SYMBOL_GPL(amd_gpu_node_start_id); + +u16 amd_cpu_node_count(void) +{ + return (amd_northbridges.nodemap) ? + amd_northbridges.nodemap->cpu_node_count : amd_northbridges.num; +} +EXPORT_SYMBOL_GPL(amd_cpu_node_count); + +/* GPU Data Fabric ID Device 24 Function 1 */ +#define PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F1 0x14d1 + +/* DF18xF1 registers on Aldebaran GPU */ +#define REG_LOCAL_NODE_TYPE_MAP 0x144 +#define REG_RMT_NODE_TYPE_MAP 0x148 + +/* + * Newer AMD CPUs and GPUs whose data fabrics can be connected via custom xGMI + * links, comes with registers to gather local and remote node type map info. + * + * "Local Node Type" refers to nodes with the same type as that from which the + * register is read, and "Remote Node Type" refers to nodes with a different type. + * + * This function, reads the registers from GPU DF function 1. + * Hence, local nodes are GPU and remote nodes are CPUs. + */ +static int amd_get_node_map(void) +{ + struct amd_node_map *nodemap; + struct pci_dev *pdev; + u32 tmp; + + pdev = pci_get_device(PCI_VENDOR_ID_AMD, + PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F1, NULL); + if (!pdev) { + pr_debug("DF Func1 PCI device not found on this node.\n"); + return -ENODEV; + } + + nodemap = kmalloc(sizeof(*nodemap), GFP_KERNEL); + if (!nodemap) + return -ENOMEM; + + pci_read_config_dword(pdev, REG_LOCAL_NODE_TYPE_MAP, &tmp); + nodemap->gpu_node_start_id = tmp & 0xFFF; + + pci_read_config_dword(pdev, REG_RMT_NODE_TYPE_MAP, &tmp); + nodemap->cpu_node_count = tmp >> 16 & 0xFFF; + + amd_northbridges.nodemap = nodemap; + return 0; +} + static struct pci_dev *next_northbridge(struct pci_dev *dev, const struct pci_device_id *ids) { @@ -230,6 +297,27 @@ int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) } EXPORT_SYMBOL_GPL(amd_df_indirect_read); +struct pci_dev *get_root_devs(struct pci_dev *root, + const struct pci_device_id *root_ids, + u16 roots_per_misc) +{ + u16 j; + + /* + * If there are more PCI root devices than data fabric/ + * system management network interfaces, then the (N) + * PCI roots per DF/SMN interface are functionally the + * same (for DF/SMN access) and N-1 are redundant. N-1 + * PCI roots should be skipped per DF/SMN interface so + * the following DF/SMN interfaces get mapped to + * correct PCI roots. + */ + for (j = 0; j < roots_per_misc; j++) + root = next_northbridge(root, root_ids); + + return root; +} + int amd_cache_northbridges(void) { const struct pci_device_id *misc_ids = amd_nb_misc_ids; @@ -237,10 +325,10 @@ int amd_cache_northbridges(void) const struct pci_device_id *root_ids = amd_root_ids; struct pci_dev *root, *misc, *link; struct amd_northbridge *nb; - u16 roots_per_misc = 0; - u16 misc_count = 0; - u16 root_count = 0; - u16 i, j; + u16 roots_per_misc = 0, gpu_roots_per_misc = 0; + u16 misc_count = 0, gpu_misc_count = 0; + u16 root_count = 0, gpu_root_count = 0; + u16 i; if (amd_northbridges.num) return 0; @@ -252,15 +340,23 @@ int amd_cache_northbridges(void) } misc = NULL; - while ((misc = next_northbridge(misc, misc_ids)) != NULL) - misc_count++; + while ((misc = next_northbridge(misc, misc_ids)) != NULL) { + if (misc->device == PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F3) + gpu_misc_count++; + else + misc_count++; + } if (!misc_count) return -ENODEV; root = NULL; - while ((root = next_northbridge(root, root_ids)) != NULL) - root_count++; + while ((root = next_northbridge(root, root_ids)) != NULL) { + if (root->device == PCI_DEVICE_ID_AMD_ALDEBARAN_ROOT) + gpu_root_count++; + else + root_count++; + } if (root_count) { roots_per_misc = root_count / misc_count; @@ -275,33 +371,37 @@ int amd_cache_northbridges(void) } } - nb = kcalloc(misc_count, sizeof(struct amd_northbridge), GFP_KERNEL); + /* + * The number of miscs, roots and roots_per_misc might vary on different + * nodes of a heterogeneous system. + * Calculate roots_per_misc accordingly in order to skip the redundant + * roots and map the DF/SMN interfaces to correct PCI roots. + */ + if (gpu_root_count && gpu_misc_count) { + int ret = amd_get_node_map(); + + if (ret) + return ret; + + gpu_roots_per_misc = gpu_root_count / gpu_misc_count; + } + + amd_northbridges.num = misc_count + gpu_misc_count; + nb = kcalloc(amd_northbridges.num, sizeof(struct amd_northbridge), GFP_KERNEL); if (!nb) return -ENOMEM; amd_northbridges.nb = nb; - amd_northbridges.num = misc_count; link = misc = root = NULL; for (i = 0; i < amd_northbridges.num; i++) { + u16 misc_roots = i < misc_count ? roots_per_misc : gpu_roots_per_misc; node_to_amd_nb(i)->root = root = - next_northbridge(root, root_ids); + get_root_devs(root, root_ids, misc_roots); node_to_amd_nb(i)->misc = misc = next_northbridge(misc, misc_ids); node_to_amd_nb(i)->link = link = next_northbridge(link, link_ids); - - /* - * If there are more PCI root devices than data fabric/ - * system management network interfaces, then the (N) - * PCI roots per DF/SMN interface are functionally the - * same (for DF/SMN access) and N-1 are redundant. N-1 - * PCI roots should be skipped per DF/SMN interface so - * the following DF/SMN interfaces get mapped to - * correct PCI roots. - */ - for (j = 1; j < roots_per_misc; j++) - root = next_northbridge(root, root_ids); } if (amd_gart_present()) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 011f2f1ea5bb..b3a0ec29dbd6 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -557,6 +557,7 @@ #define PCI_DEVICE_ID_AMD_19H_DF_F3 0x1653 #define PCI_DEVICE_ID_AMD_19H_M40H_DF_F3 0x167c #define PCI_DEVICE_ID_AMD_19H_M50H_DF_F3 0x166d +#define PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F3 0x14d3 #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001