diff options
| author | holt@sgi.com <holt@sgi.com> | 2008-04-03 15:17:13 -0500 | 
|---|---|---|
| committer | Tony Luck <tony.luck@intel.com> | 2008-04-08 13:51:35 -0700 | 
| commit | 2c6e6db41f01b6b4eb98809350827c9678996698 (patch) | |
| tree | 00438344c0ad599c1301db2abe32a4c2ee89b607 | |
| parent | 41bd26d67c41e325c6b9e56aadfe9dad8af9a565 (diff) | |
| download | olio-linux-3.10-2c6e6db41f01b6b4eb98809350827c9678996698.tar.xz olio-linux-3.10-2c6e6db41f01b6b4eb98809350827c9678996698.zip  | |
[IA64] Minimize per_cpu reservations.
This attached patch significantly shrinks boot memory allocation on ia64.
It does this by not allocating per_cpu areas for cpus that can never
exist.
In the case where acpi does not have any numa node description of the
cpus, I defaulted to assigning the first 32 round-robin on the known
nodes..  For the !CONFIG_ACPI  I used for_each_possible_cpu().
Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
| -rw-r--r-- | arch/ia64/kernel/acpi.c | 4 | ||||
| -rw-r--r-- | arch/ia64/kernel/numa.c | 2 | ||||
| -rw-r--r-- | arch/ia64/kernel/setup.c | 2 | ||||
| -rw-r--r-- | arch/ia64/mm/discontig.c | 12 | ||||
| -rw-r--r-- | arch/ia64/mm/numa.c | 4 | ||||
| -rw-r--r-- | include/asm-ia64/acpi.h | 33 | ||||
| -rw-r--r-- | include/asm-ia64/numa.h | 2 | 
7 files changed, 48 insertions, 11 deletions
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 78f28d825f3..c7467f863c7 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -423,6 +423,7 @@ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];  #define pxm_bit_set(bit)	(set_bit(bit,(void *)pxm_flag))  #define pxm_bit_test(bit)	(test_bit(bit,(void *)pxm_flag))  static struct acpi_table_slit __initdata *slit_table; +cpumask_t early_cpu_possible_map = CPU_MASK_NONE;  static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)  { @@ -482,6 +483,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)  	    (pa->apic_id << 8) | (pa->local_sapic_eid);  	/* nid should be overridden as logical node id later */  	node_cpuid[srat_num_cpus].nid = pxm; +	cpu_set(srat_num_cpus, early_cpu_possible_map);  	srat_num_cpus++;  } @@ -559,7 +561,7 @@ void __init acpi_numa_arch_fixup(void)  	}  	/* set logical node id in cpu structure */ -	for (i = 0; i < srat_num_cpus; i++) +	for_each_possible_early_cpu(i)  		node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid);  	printk(KERN_INFO "Number of logical nodes in system = %d\n", diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c index a78b45f5fe2..c93420c9740 100644 --- a/arch/ia64/kernel/numa.c +++ b/arch/ia64/kernel/numa.c @@ -73,7 +73,7 @@ void __init build_cpu_to_node_map(void)  	for(node=0; node < MAX_NUMNODES; node++)  		cpus_clear(node_to_cpu_mask[node]); -	for(cpu = 0; cpu < NR_CPUS; ++cpu) { +	for_each_possible_early_cpu(cpu) {  		node = -1;  		for (i = 0; i < NR_CPUS; ++i)  			if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 4aa9eaea76c..6206541f9e8 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -493,6 +493,8 @@ setup_arch (char **cmdline_p)  	acpi_table_init();  # ifdef CONFIG_ACPI_NUMA  	acpi_numa_init(); +	per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ? +		32 : cpus_weight(early_cpu_possible_map)), additional_cpus);  # endif  #else  # ifdef CONFIG_SMP diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 06c540a2946..6136a4c6df1 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -104,7 +104,7 @@ static int __meminit early_nr_cpus_node(int node)  {  	int cpu, n = 0; -	for (cpu = 0; cpu < NR_CPUS; cpu++) +	for_each_possible_early_cpu(cpu)  		if (node == node_cpuid[cpu].nid)  			n++; @@ -143,7 +143,7 @@ static void *per_cpu_node_setup(void *cpu_data, int node)  #ifdef CONFIG_SMP  	int cpu; -	for (cpu = 0; cpu < NR_CPUS; cpu++) { +	for_each_possible_early_cpu(cpu) {  		if (node == node_cpuid[cpu].nid) {  			memcpy(__va(cpu_data), __phys_per_cpu_start,  			       __per_cpu_end - __per_cpu_start); @@ -346,7 +346,7 @@ static void __init initialize_pernode_data(void)  #ifdef CONFIG_SMP  	/* Set the node_data pointer for each per-cpu struct */ -	for (cpu = 0; cpu < NR_CPUS; cpu++) { +	for_each_possible_early_cpu(cpu) {  		node = node_cpuid[cpu].nid;  		per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;  	} @@ -494,13 +494,9 @@ void __cpuinit *per_cpu_init(void)  	int cpu;  	static int first_time = 1; - -	if (smp_processor_id() != 0) -		return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; -  	if (first_time) {  		first_time = 0; -		for (cpu = 0; cpu < NR_CPUS; cpu++) +		for_each_possible_early_cpu(cpu)  			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];  	} diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index 7807fc5c042..b73bf1838e5 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -27,7 +27,9 @@   */  int num_node_memblks;  struct node_memblk_s node_memblk[NR_NODE_MEMBLKS]; -struct node_cpuid_s node_cpuid[NR_CPUS]; +struct node_cpuid_s node_cpuid[NR_CPUS] = +	{ [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } }; +  /*   * This is a matrix with "distances" between nodes, they should be   * proportional to the memory access latency ratios. diff --git a/include/asm-ia64/acpi.h b/include/asm-ia64/acpi.h index cd1cc39b559..fcfad326f4c 100644 --- a/include/asm-ia64/acpi.h +++ b/include/asm-ia64/acpi.h @@ -35,6 +35,7 @@  #include <linux/init.h>  #include <linux/numa.h>  #include <asm/system.h> +#include <asm/numa.h>  #define COMPILER_DEPENDENT_INT64	long  #define COMPILER_DEPENDENT_UINT64	unsigned long @@ -115,7 +116,11 @@ extern unsigned int is_cpu_cpei_target(unsigned int cpu);  extern void set_cpei_target_cpu(unsigned int cpu);  extern unsigned int get_cpei_target_cpu(void);  extern void prefill_possible_map(void); +#ifdef CONFIG_ACPI_HOTPLUG_CPU  extern int additional_cpus; +#else +#define additional_cpus 0 +#endif  #ifdef CONFIG_ACPI_NUMA  #if MAX_NUMNODES > 256 @@ -129,6 +134,34 @@ extern int __initdata nid_to_pxm_map[MAX_NUMNODES];  #define acpi_unlazy_tlb(x) +#ifdef CONFIG_ACPI_NUMA +extern cpumask_t early_cpu_possible_map; +#define for_each_possible_early_cpu(cpu)  \ +	for_each_cpu_mask((cpu), early_cpu_possible_map) + +static inline void per_cpu_scan_finalize(int min_cpus, int reserve_cpus) +{ +	int low_cpu, high_cpu; +	int cpu; +	int next_nid = 0; + +	low_cpu = cpus_weight(early_cpu_possible_map); + +	high_cpu = max(low_cpu, min_cpus); +	high_cpu = min(high_cpu + reserve_cpus, NR_CPUS); + +	for (cpu = low_cpu; cpu < high_cpu; cpu++) { +		cpu_set(cpu, early_cpu_possible_map); +		if (node_cpuid[cpu].nid == NUMA_NO_NODE) { +			node_cpuid[cpu].nid = next_nid; +			next_nid++; +			if (next_nid >= num_online_nodes()) +				next_nid = 0; +		} +	} +} +#endif /* CONFIG_ACPI_NUMA */ +  #endif /*__KERNEL__*/  #endif /*_ASM_ACPI_H*/ diff --git a/include/asm-ia64/numa.h b/include/asm-ia64/numa.h index 6a8a27cfae3..3499ff57bf4 100644 --- a/include/asm-ia64/numa.h +++ b/include/asm-ia64/numa.h @@ -22,6 +22,8 @@  #include <asm/mmzone.h> +#define NUMA_NO_NODE	-1 +  extern u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned;  extern cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;  extern pg_data_t *pgdat_list[MAX_NUMNODES];  |