From patchwork Mon Feb 25 02:38:34 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: li guang X-Patchwork-Id: 2179881 Return-Path: X-Original-To: patchwork-linux-acpi@patchwork.kernel.org Delivered-To: patchwork-process-083081@patchwork1.kernel.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by patchwork1.kernel.org (Postfix) with ESMTP id 87F6E40AFD for ; Mon, 25 Feb 2013 02:44:15 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1759464Ab3BYCnS (ORCPT ); Sun, 24 Feb 2013 21:43:18 -0500 Received: from cn.fujitsu.com ([222.73.24.84]:39335 "EHLO song.cn.fujitsu.com" rhost-flags-OK-FAIL-OK-OK) by vger.kernel.org with ESMTP id S1759608Ab3BYCnE (ORCPT ); Sun, 24 Feb 2013 21:43:04 -0500 X-IronPort-AV: E=Sophos;i="4.84,731,1355068800"; d="scan'208";a="6760670" Received: from unknown (HELO tang.cn.fujitsu.com) ([10.167.250.3]) by song.cn.fujitsu.com with ESMTP; 25 Feb 2013 10:37:18 +0800 Received: from fnstmail02.fnst.cn.fujitsu.com (tang.cn.fujitsu.com [127.0.0.1]) by tang.cn.fujitsu.com (8.14.3/8.13.1) with ESMTP id r1P2dbMx010007; Mon, 25 Feb 2013 10:39:37 +0800 Received: from liguang.fnst.cn.fujitsu.com ([10.167.233.147]) by fnstmail02.fnst.cn.fujitsu.com (Lotus Domino Release 8.5.3) with ESMTP id 2013022510384746-376089 ; Mon, 25 Feb 2013 10:38:47 +0800 From: liguang To: tglx@linutronix.de, hpa@zytor.com, pavel@ucw.cz, rjw@sisk.pl, lv.zheng@intel.com, jarkko.sakkinen@intel.com, dave@linux.vnet.ibm.com, linux-kernel@vger.kernel.org, linux-acpi@vger.kernel.org, x86@kernel.org Cc: liguang Subject: [rebased-again][PATCH 1/4] acpi: move x86/mm/srat.c to x86/kernel/acpi/srat.c Date: Mon, 25 Feb 2013 10:38:34 +0800 Message-Id: <1361759917-5195-2-git-send-email-lig.fnst@cn.fujitsu.com> X-Mailer: git-send-email 1.7.2.5 In-Reply-To: <1361759917-5195-1-git-send-email-lig.fnst@cn.fujitsu.com> References: <1361759917-5195-1-git-send-email-lig.fnst@cn.fujitsu.com> X-MIMETrack: Itemize by SMTP Server on mailserver/fnst(Release 8.5.3|September 15, 2011) at 2013/02/25 10:38:47, Serialize by Router on mailserver/fnst(Release 8.5.3|September 15, 2011) at 2013/02/25 10:38:50, Serialize complete at 2013/02/25 10:38:50 Sender: linux-acpi-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-acpi@vger.kernel.org srat table should present only on acpi domain, seems mm/ is not the right place for it. Reviewed-by: Yasuaki Ishimatsu Signed-off-by: liguang --- arch/x86/kernel/acpi/Makefile | 1 + arch/x86/kernel/acpi/srat.c | 284 +++++++++++++++++++++++++++++++++++++++++ arch/x86/mm/Makefile | 1 - arch/x86/mm/srat.c | 284 ----------------------------------------- 4 files changed, 285 insertions(+), 285 deletions(-) create mode 100644 arch/x86/kernel/acpi/srat.c delete mode 100644 arch/x86/mm/srat.c diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index 163b225..98cea92 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_ACPI) += boot.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o +obj-$(CONFIG_ACPI_NUMA) += srat.o ifneq ($(CONFIG_ACPI_PROCESSOR),) obj-y += cstate.o diff --git a/arch/x86/kernel/acpi/srat.c b/arch/x86/kernel/acpi/srat.c new file mode 100644 index 0000000..459c391 --- /dev/null +++ b/arch/x86/kernel/acpi/srat.c @@ -0,0 +1,284 @@ +/* + * ACPI 3.0 based NUMA setup + * Copyright 2004 Andi Kleen, SuSE Labs. + * + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs. + * + * Called from acpi_numa_init while reading the SRAT and SLIT tables. + * Assumes all memory regions belonging to a single proximity domain + * are in one chunk. Holes between them will be included in the node. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int acpi_numa __initdata; + +static __init int setup_node(int pxm) +{ + return acpi_map_pxm_to_node(pxm); +} + +static __init void bad_srat(void) +{ + printk(KERN_ERR "SRAT: SRAT not used.\n"); + acpi_numa = -1; +} + +static __init inline int srat_disabled(void) +{ + return acpi_numa < 0; +} + +/* Callback for SLIT parsing */ +void __init acpi_numa_slit_init(struct acpi_table_slit *slit) +{ + int i, j; + + for (i = 0; i < slit->locality_count; i++) + for (j = 0; j < slit->locality_count; j++) + numa_set_distance(pxm_to_node(i), pxm_to_node(j), + slit->entry[slit->locality_count * i + j]); +} + +/* Callback for Proximity Domain -> x2APIC mapping */ +void __init +acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) +{ + int pxm, node; + int apic_id; + + if (srat_disabled()) + return; + if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) { + bad_srat(); + return; + } + if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) + return; + pxm = pa->proximity_domain; + apic_id = pa->apic_id; + if (!apic->apic_id_valid(apic_id)) { + printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n", + pxm, apic_id); + return; + } + node = setup_node(pxm); + if (node < 0) { + printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); + bad_srat(); + return; + } + + if (apic_id >= MAX_LOCAL_APIC) { + printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); + return; + } + set_apicid_to_node(apic_id, node); + node_set(node, numa_nodes_parsed); + acpi_numa = 1; + printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", + pxm, apic_id, node); +} + +/* Callback for Proximity Domain -> LAPIC mapping */ +void __init +acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) +{ + int pxm, node; + int apic_id; + + if (srat_disabled()) + return; + if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) { + bad_srat(); + return; + } + if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) + return; + pxm = pa->proximity_domain_lo; + if (acpi_srat_revision >= 2) + pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8; + node = setup_node(pxm); + if (node < 0) { + printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); + bad_srat(); + return; + } + + if (get_uv_system_type() >= UV_X2APIC) + apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; + else + apic_id = pa->apic_id; + + if (apic_id >= MAX_LOCAL_APIC) { + printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); + return; + } + + set_apicid_to_node(apic_id, node); + node_set(node, numa_nodes_parsed); + acpi_numa = 1; + printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", + pxm, apic_id, node); +} + +#ifdef CONFIG_MEMORY_HOTPLUG +static inline int save_add_info(void) {return 1;} +#else +static inline int save_add_info(void) {return 0;} +#endif + +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +static void __init +handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable) +{ + int overlap; + unsigned long start_pfn, end_pfn; + + start_pfn = PFN_DOWN(start); + end_pfn = PFN_UP(end); + + /* + * For movablemem_map=acpi: + * + * SRAT: |_____| |_____| |_________| |_________| ...... + * node id: 0 1 1 2 + * hotpluggable: n y y n + * movablemem_map: |_____| |_________| + * + * Using movablemem_map, we can prevent memblock from allocating memory + * on ZONE_MOVABLE at boot time. + */ + if (hotpluggable && movablemem_map.acpi) { + insert_movablemem_map(start_pfn, end_pfn); + + /* + * numa_nodes_hotplug nodemask represents which nodes are put + * into movablemem_map.map[]. + */ + node_set(node, movablemem_map.numa_nodes_hotplug); + goto out; + } + + /* + * For movablemem_map=nn[KMG]@ss[KMG]: + * + * SRAT: |_____| |_____| |_________| |_________| ...... + * node id: 0 1 1 2 + * user specified: |__| |___| + * movablemem_map: |___| |_________| |______| ...... + * + * Using movablemem_map, we can prevent memblock from allocating memory + * on ZONE_MOVABLE at boot time. + * + * NOTE: In this case, SRAT info will be ingored. + */ + overlap = movablemem_map_overlap(start_pfn, end_pfn); + if (overlap >= 0) { + /* + * If part of this range is in movablemem_map, we need to + * add the range after it to extend the range to the end + * of the node, because from the min address specified to + * the end of the node will be ZONE_MOVABLE. + */ + start_pfn = max(start_pfn, + movablemem_map.map[overlap].start_pfn); + insert_movablemem_map(start_pfn, end_pfn); + + /* + * Set the nodemask, so that if the address range on one node + * is not continuse, we can add the subsequent ranges on the + * same node into movablemem_map. + */ + node_set(node, movablemem_map.numa_nodes_hotplug); + } else { + if (node_isset(node, movablemem_map.numa_nodes_hotplug)) + /* + * Insert the range if we already have movable ranges + * on the same node. + */ + insert_movablemem_map(start_pfn, end_pfn); + } +out: + return; +} +#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ +static inline void +handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable) +{ +} +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ + +/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ +int __init +acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) +{ + u64 start, end; + u32 hotpluggable; + int node, pxm; + + if (srat_disabled()) + goto out_err; + if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) + goto out_err_bad_srat; + if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) + goto out_err; + hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE; + if (hotpluggable && !save_add_info()) + goto out_err; + + start = ma->base_address; + end = start + ma->length; + pxm = ma->proximity_domain; + if (acpi_srat_revision <= 1) + pxm &= 0xff; + + node = setup_node(pxm); + if (node < 0) { + printk(KERN_ERR "SRAT: Too many proximity domains.\n"); + goto out_err_bad_srat; + } + + if (numa_add_memblk(node, start, end) < 0) + goto out_err_bad_srat; + + node_set(node, numa_nodes_parsed); + + printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] %s\n", + node, pxm, + (unsigned long long) start, (unsigned long long) end - 1, + hotpluggable ? "Hot Pluggable": ""); + + handle_movablemem(node, start, end, hotpluggable); + + return 0; +out_err_bad_srat: + bad_srat(); +out_err: + return -1; +} + +void __init acpi_numa_arch_fixup(void) {} + +int __init x86_acpi_numa_init(void) +{ + int ret; + + ret = acpi_numa_init(); + if (ret < 0) + return ret; + return srat_disabled() ? -EINVAL : 0; +} diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 23d8e5f..d6f3692 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -24,7 +24,6 @@ obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o obj-$(CONFIG_AMD_NUMA) += amdtopology.o -obj-$(CONFIG_ACPI_NUMA) += srat.o obj-$(CONFIG_NUMA_EMU) += numa_emulation.o obj-$(CONFIG_MEMTEST) += memtest.o diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c deleted file mode 100644 index 459c391..0000000 --- a/arch/x86/mm/srat.c +++ /dev/null @@ -1,284 +0,0 @@ -/* - * ACPI 3.0 based NUMA setup - * Copyright 2004 Andi Kleen, SuSE Labs. - * - * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs. - * - * Called from acpi_numa_init while reading the SRAT and SLIT tables. - * Assumes all memory regions belonging to a single proximity domain - * are in one chunk. Holes between them will be included in the node. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int acpi_numa __initdata; - -static __init int setup_node(int pxm) -{ - return acpi_map_pxm_to_node(pxm); -} - -static __init void bad_srat(void) -{ - printk(KERN_ERR "SRAT: SRAT not used.\n"); - acpi_numa = -1; -} - -static __init inline int srat_disabled(void) -{ - return acpi_numa < 0; -} - -/* Callback for SLIT parsing */ -void __init acpi_numa_slit_init(struct acpi_table_slit *slit) -{ - int i, j; - - for (i = 0; i < slit->locality_count; i++) - for (j = 0; j < slit->locality_count; j++) - numa_set_distance(pxm_to_node(i), pxm_to_node(j), - slit->entry[slit->locality_count * i + j]); -} - -/* Callback for Proximity Domain -> x2APIC mapping */ -void __init -acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) -{ - int pxm, node; - int apic_id; - - if (srat_disabled()) - return; - if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) { - bad_srat(); - return; - } - if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) - return; - pxm = pa->proximity_domain; - apic_id = pa->apic_id; - if (!apic->apic_id_valid(apic_id)) { - printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n", - pxm, apic_id); - return; - } - node = setup_node(pxm); - if (node < 0) { - printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); - bad_srat(); - return; - } - - if (apic_id >= MAX_LOCAL_APIC) { - printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); - return; - } - set_apicid_to_node(apic_id, node); - node_set(node, numa_nodes_parsed); - acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", - pxm, apic_id, node); -} - -/* Callback for Proximity Domain -> LAPIC mapping */ -void __init -acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) -{ - int pxm, node; - int apic_id; - - if (srat_disabled()) - return; - if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) { - bad_srat(); - return; - } - if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) - return; - pxm = pa->proximity_domain_lo; - if (acpi_srat_revision >= 2) - pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8; - node = setup_node(pxm); - if (node < 0) { - printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); - bad_srat(); - return; - } - - if (get_uv_system_type() >= UV_X2APIC) - apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; - else - apic_id = pa->apic_id; - - if (apic_id >= MAX_LOCAL_APIC) { - printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); - return; - } - - set_apicid_to_node(apic_id, node); - node_set(node, numa_nodes_parsed); - acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", - pxm, apic_id, node); -} - -#ifdef CONFIG_MEMORY_HOTPLUG -static inline int save_add_info(void) {return 1;} -#else -static inline int save_add_info(void) {return 0;} -#endif - -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP -static void __init -handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable) -{ - int overlap; - unsigned long start_pfn, end_pfn; - - start_pfn = PFN_DOWN(start); - end_pfn = PFN_UP(end); - - /* - * For movablemem_map=acpi: - * - * SRAT: |_____| |_____| |_________| |_________| ...... - * node id: 0 1 1 2 - * hotpluggable: n y y n - * movablemem_map: |_____| |_________| - * - * Using movablemem_map, we can prevent memblock from allocating memory - * on ZONE_MOVABLE at boot time. - */ - if (hotpluggable && movablemem_map.acpi) { - insert_movablemem_map(start_pfn, end_pfn); - - /* - * numa_nodes_hotplug nodemask represents which nodes are put - * into movablemem_map.map[]. - */ - node_set(node, movablemem_map.numa_nodes_hotplug); - goto out; - } - - /* - * For movablemem_map=nn[KMG]@ss[KMG]: - * - * SRAT: |_____| |_____| |_________| |_________| ...... - * node id: 0 1 1 2 - * user specified: |__| |___| - * movablemem_map: |___| |_________| |______| ...... - * - * Using movablemem_map, we can prevent memblock from allocating memory - * on ZONE_MOVABLE at boot time. - * - * NOTE: In this case, SRAT info will be ingored. - */ - overlap = movablemem_map_overlap(start_pfn, end_pfn); - if (overlap >= 0) { - /* - * If part of this range is in movablemem_map, we need to - * add the range after it to extend the range to the end - * of the node, because from the min address specified to - * the end of the node will be ZONE_MOVABLE. - */ - start_pfn = max(start_pfn, - movablemem_map.map[overlap].start_pfn); - insert_movablemem_map(start_pfn, end_pfn); - - /* - * Set the nodemask, so that if the address range on one node - * is not continuse, we can add the subsequent ranges on the - * same node into movablemem_map. - */ - node_set(node, movablemem_map.numa_nodes_hotplug); - } else { - if (node_isset(node, movablemem_map.numa_nodes_hotplug)) - /* - * Insert the range if we already have movable ranges - * on the same node. - */ - insert_movablemem_map(start_pfn, end_pfn); - } -out: - return; -} -#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -static inline void -handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable) -{ -} -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ - -/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ -int __init -acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) -{ - u64 start, end; - u32 hotpluggable; - int node, pxm; - - if (srat_disabled()) - goto out_err; - if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) - goto out_err_bad_srat; - if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) - goto out_err; - hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE; - if (hotpluggable && !save_add_info()) - goto out_err; - - start = ma->base_address; - end = start + ma->length; - pxm = ma->proximity_domain; - if (acpi_srat_revision <= 1) - pxm &= 0xff; - - node = setup_node(pxm); - if (node < 0) { - printk(KERN_ERR "SRAT: Too many proximity domains.\n"); - goto out_err_bad_srat; - } - - if (numa_add_memblk(node, start, end) < 0) - goto out_err_bad_srat; - - node_set(node, numa_nodes_parsed); - - printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] %s\n", - node, pxm, - (unsigned long long) start, (unsigned long long) end - 1, - hotpluggable ? "Hot Pluggable": ""); - - handle_movablemem(node, start, end, hotpluggable); - - return 0; -out_err_bad_srat: - bad_srat(); -out_err: - return -1; -} - -void __init acpi_numa_arch_fixup(void) {} - -int __init x86_acpi_numa_init(void) -{ - int ret; - - ret = acpi_numa_init(); - if (ret < 0) - return ret; - return srat_disabled() ? -EINVAL : 0; -}