diff options
Diffstat (limited to 'arch/powerpc/mm')
| -rw-r--r-- | arch/powerpc/mm/fsl_booke_mmu.c | 7 | ||||
| -rw-r--r-- | arch/powerpc/mm/numa.c | 122 | ||||
| -rw-r--r-- | arch/powerpc/mm/pgtable.c | 1 | ||||
| -rw-r--r-- | arch/powerpc/mm/tlb_hash32.c | 15 | ||||
| -rw-r--r-- | arch/powerpc/mm/tlb_nohash.c | 129 | 
5 files changed, 196 insertions, 78 deletions
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index cdc7526e9c9..4b66a1ece6d 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -104,9 +104,10 @@ unsigned long p_mapped_by_tlbcam(phys_addr_t pa)  }  /* - * Set up one of the I/D BAT (block address translation) register pairs. - * The parameters are not checked; in particular size must be a power - * of 4 between 4k and 256M. + * Set up a variable-size TLB entry (tlbcam). The parameters are not checked; + * in particular size must be a power of 4 between 4k and 256M (or 1G, for cpus + * that support extended page sizes).  Note that while some cpus support a + * page size of 4G, we don't allow its use here.   */  static void settlbcam(int index, unsigned long virt, phys_addr_t phys,  		unsigned long size, unsigned long flags, unsigned int pid) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index aa731af720c..002878ccf90 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -42,6 +42,12 @@ EXPORT_SYMBOL(node_data);  static int min_common_depth;  static int n_mem_addr_cells, n_mem_size_cells; +static int form1_affinity; + +#define MAX_DISTANCE_REF_POINTS 4 +static int distance_ref_points_depth; +static const unsigned int *distance_ref_points; +static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];  /*   * Allocate node_to_cpumask_map based on number of available nodes @@ -204,6 +210,39 @@ static const u32 *of_get_usable_memory(struct device_node *memory)  	return prop;  } +int __node_distance(int a, int b) +{ +	int i; +	int distance = LOCAL_DISTANCE; + +	if (!form1_affinity) +		return distance; + +	for (i = 0; i < distance_ref_points_depth; i++) { +		if (distance_lookup_table[a][i] == distance_lookup_table[b][i]) +			break; + +		/* Double the distance for each NUMA level */ +		distance *= 2; +	} + +	return distance; +} + +static void initialize_distance_lookup_table(int nid, +		const unsigned int *associativity) +{ +	int i; + +	if (!form1_affinity) +		return; + +	for (i = 0; i < distance_ref_points_depth; i++) { +		distance_lookup_table[nid][i] = +			associativity[distance_ref_points[i]]; +	} +} +  /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa   * info is found.   */ @@ -225,6 +264,10 @@ static int of_node_to_nid_single(struct device_node *device)  	/* POWER4 LPAR uses 0xffff as invalid node */  	if (nid == 0xffff || nid >= MAX_NUMNODES)  		nid = -1; + +	if (nid > 0 && tmp[0] >= distance_ref_points_depth) +		initialize_distance_lookup_table(nid, tmp); +  out:  	return nid;  } @@ -251,26 +294,10 @@ int of_node_to_nid(struct device_node *device)  }  EXPORT_SYMBOL_GPL(of_node_to_nid); -/* - * In theory, the "ibm,associativity" property may contain multiple - * associativity lists because a resource may be multiply connected - * into the machine.  This resource then has different associativity - * characteristics relative to its multiple connections.  We ignore - * this for now.  We also assume that all cpu and memory sets have - * their distances represented at a common level.  This won't be - * true for hierarchical NUMA. - * - * In any case the ibm,associativity-reference-points should give - * the correct depth for a normal NUMA system. - * - * - Dave Hansen <haveblue@us.ibm.com> - */  static int __init find_min_common_depth(void)  { -	int depth, index; -	const unsigned int *ref_points; +	int depth;  	struct device_node *rtas_root; -	unsigned int len;  	struct device_node *chosen;  	const char *vec5; @@ -280,18 +307,28 @@ static int __init find_min_common_depth(void)  		return -1;  	/* -	 * this property is 2 32-bit integers, each representing a level of -	 * depth in the associativity nodes.  The first is for an SMP -	 * configuration (should be all 0's) and the second is for a normal -	 * NUMA configuration. +	 * This property is a set of 32-bit integers, each representing +	 * an index into the ibm,associativity nodes. +	 * +	 * With form 0 affinity the first integer is for an SMP configuration +	 * (should be all 0's) and the second is for a normal NUMA +	 * configuration. We have only one level of NUMA. +	 * +	 * With form 1 affinity the first integer is the most significant +	 * NUMA boundary and the following are progressively less significant +	 * boundaries. There can be more than one level of NUMA.  	 */ -	index = 1; -	ref_points = of_get_property(rtas_root, -			"ibm,associativity-reference-points", &len); +	distance_ref_points = of_get_property(rtas_root, +					"ibm,associativity-reference-points", +					&distance_ref_points_depth); + +	if (!distance_ref_points) { +		dbg("NUMA: ibm,associativity-reference-points not found.\n"); +		goto err; +	} + +	distance_ref_points_depth /= sizeof(int); -	/* -	 * For form 1 affinity information we want the first field -	 */  #define VEC5_AFFINITY_BYTE	5  #define VEC5_AFFINITY		0x80  	chosen = of_find_node_by_path("/chosen"); @@ -299,19 +336,38 @@ static int __init find_min_common_depth(void)  		vec5 = of_get_property(chosen, "ibm,architecture-vec-5", NULL);  		if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & VEC5_AFFINITY)) {  			dbg("Using form 1 affinity\n"); -			index = 0; +			form1_affinity = 1;  		}  	} -	if ((len >= 2 * sizeof(unsigned int)) && ref_points) { -		depth = ref_points[index]; +	if (form1_affinity) { +		depth = distance_ref_points[0];  	} else { -		dbg("NUMA: ibm,associativity-reference-points not found.\n"); -		depth = -1; +		if (distance_ref_points_depth < 2) { +			printk(KERN_WARNING "NUMA: " +				"short ibm,associativity-reference-points\n"); +			goto err; +		} + +		depth = distance_ref_points[1];  	} -	of_node_put(rtas_root); +	/* +	 * Warn and cap if the hardware supports more than +	 * MAX_DISTANCE_REF_POINTS domains. +	 */ +	if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) { +		printk(KERN_WARNING "NUMA: distance array capped at " +			"%d entries\n", MAX_DISTANCE_REF_POINTS); +		distance_ref_points_depth = MAX_DISTANCE_REF_POINTS; +	} + +	of_node_put(rtas_root);  	return depth; + +err: +	of_node_put(rtas_root); +	return -1;  }  static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index ebc2f38eb38..2c7e801ab20 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -92,7 +92,6 @@ static void pte_free_rcu_callback(struct rcu_head *head)  static void pte_free_submit(struct pte_freelist_batch *batch)  { -	INIT_RCU_HEAD(&batch->rcu);  	call_rcu(&batch->rcu, pte_free_rcu_callback);  } diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c index 8aaa8b7eb32..690566b66e8 100644 --- a/arch/powerpc/mm/tlb_hash32.c +++ b/arch/powerpc/mm/tlb_hash32.c @@ -89,17 +89,6 @@ void tlb_flush(struct mmu_gather *tlb)   *    -- Cort   */ -/* - * 750 SMP is a Bad Idea because the 750 doesn't broadcast all - * the cache operations on the bus.  Hence we need to use an IPI - * to get the other CPU(s) to invalidate their TLBs. - */ -#ifdef CONFIG_SMP_750 -#define FINISH_FLUSH	smp_send_tlb_invalidate(0) -#else -#define FINISH_FLUSH	do { } while (0) -#endif -  static void flush_range(struct mm_struct *mm, unsigned long start,  			unsigned long end)  { @@ -138,7 +127,6 @@ static void flush_range(struct mm_struct *mm, unsigned long start,  void flush_tlb_kernel_range(unsigned long start, unsigned long end)  {  	flush_range(&init_mm, start, end); -	FINISH_FLUSH;  }  EXPORT_SYMBOL(flush_tlb_kernel_range); @@ -162,7 +150,6 @@ void flush_tlb_mm(struct mm_struct *mm)  	 */  	for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)  		flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); -	FINISH_FLUSH;  }  EXPORT_SYMBOL(flush_tlb_mm); @@ -179,7 +166,6 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)  	pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr);  	if (!pmd_none(*pmd))  		flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); -	FINISH_FLUSH;  }  EXPORT_SYMBOL(flush_tlb_page); @@ -192,6 +178,5 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,  		     unsigned long end)  {  	flush_range(vma->vm_mm, start, end); -	FINISH_FLUSH;  }  EXPORT_SYMBOL(flush_tlb_range); diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index d8695b02a96..fe391e94252 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -46,6 +46,7 @@  struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {  	[MMU_PAGE_4K] = {  		.shift	= 12, +		.ind	= 20,  		.enc	= BOOK3E_PAGESZ_4K,  	},  	[MMU_PAGE_16K] = { @@ -54,6 +55,7 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {  	},  	[MMU_PAGE_64K] = {  		.shift	= 16, +		.ind	= 28,  		.enc	= BOOK3E_PAGESZ_64K,  	},  	[MMU_PAGE_1M] = { @@ -62,6 +64,7 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {  	},  	[MMU_PAGE_16M] = {  		.shift	= 24, +		.ind	= 36,  		.enc	= BOOK3E_PAGESZ_16M,  	},  	[MMU_PAGE_256M] = { @@ -344,16 +347,108 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)  	}  } -/* - * Early initialization of the MMU TLB code - */ -static void __early_init_mmu(int boot_cpu) +static void setup_page_sizes(void) +{ +	unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); +	unsigned int tlb0ps = mfspr(SPRN_TLB0PS); +	unsigned int eptcfg = mfspr(SPRN_EPTCFG); +	int i, psize; + +	/* Look for supported direct sizes */ +	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { +		struct mmu_psize_def *def = &mmu_psize_defs[psize]; + +		if (tlb0ps & (1U << (def->shift - 10))) +			def->flags |= MMU_PAGE_SIZE_DIRECT; +	} + +	/* Indirect page sizes supported ? */ +	if ((tlb0cfg & TLBnCFG_IND) == 0) +		goto no_indirect; + +	/* Now, we only deal with one IND page size for each +	 * direct size. Hopefully all implementations today are +	 * unambiguous, but we might want to be careful in the +	 * future. +	 */ +	for (i = 0; i < 3; i++) { +		unsigned int ps, sps; + +		sps = eptcfg & 0x1f; +		eptcfg >>= 5; +		ps = eptcfg & 0x1f; +		eptcfg >>= 5; +		if (!ps || !sps) +			continue; +		for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { +			struct mmu_psize_def *def = &mmu_psize_defs[psize]; + +			if (ps == (def->shift - 10)) +				def->flags |= MMU_PAGE_SIZE_INDIRECT; +			if (sps == (def->shift - 10)) +				def->ind = ps + 10; +		} +	} + no_indirect: + +	/* Cleanup array and print summary */ +	pr_info("MMU: Supported page sizes\n"); +	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { +		struct mmu_psize_def *def = &mmu_psize_defs[psize]; +		const char *__page_type_names[] = { +			"unsupported", +			"direct", +			"indirect", +			"direct & indirect" +		}; +		if (def->flags == 0) { +			def->shift = 0;	 +			continue; +		} +		pr_info("  %8ld KB as %s\n", 1ul << (def->shift - 10), +			__page_type_names[def->flags & 0x3]); +	} +} + +static void setup_mmu_htw(void)  {  	extern unsigned int interrupt_base_book3e;  	extern unsigned int exc_data_tlb_miss_htw_book3e;  	extern unsigned int exc_instruction_tlb_miss_htw_book3e;  	unsigned int *ibase = &interrupt_base_book3e; + +	/* Check if HW tablewalk is present, and if yes, enable it by: +	 * +	 * - patching the TLB miss handlers to branch to the +	 *   one dedicates to it +	 * +	 * - setting the global book3e_htw_enabled +       	 */ +	unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); + +	if ((tlb0cfg & TLBnCFG_IND) && +	    (tlb0cfg & TLBnCFG_PT)) { +		/* Our exceptions vectors start with a NOP and -then- a branch +		 * to deal with single stepping from userspace which stops on +		 * the second instruction. Thus we need to patch the second +		 * instruction of the exception, not the first one +		 */ +		patch_branch(ibase + (0x1c0 / 4) + 1, +			     (unsigned long)&exc_data_tlb_miss_htw_book3e, 0); +		patch_branch(ibase + (0x1e0 / 4) + 1, +			     (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0); +		book3e_htw_enabled = 1; +	} +	pr_info("MMU: Book3E Page Tables %s\n", +		book3e_htw_enabled ? "Enabled" : "Disabled"); +} + +/* + * Early initialization of the MMU TLB code + */ +static void __early_init_mmu(int boot_cpu) +{  	unsigned int mas4;  	/* XXX This will have to be decided at runtime, but right @@ -370,35 +465,17 @@ static void __early_init_mmu(int boot_cpu)  	 */  	mmu_vmemmap_psize = MMU_PAGE_16M; -	/* Check if HW tablewalk is present, and if yes, enable it by: -	 * -	 * - patching the TLB miss handlers to branch to the -	 *   one dedicates to it -	 * -	 * - setting the global book3e_htw_enabled -	 * -	 * - Set MAS4:INDD and default page size -	 */ -  	/* XXX This code only checks for TLB 0 capabilities and doesn't  	 *     check what page size combos are supported by the HW. It  	 *     also doesn't handle the case where a separate array holds  	 *     the IND entries from the array loaded by the PT.  	 */  	if (boot_cpu) { -		unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); +		/* Look for supported page sizes */ +		setup_page_sizes(); -		/* Check if HW loader is supported */ -		if ((tlb0cfg & TLBnCFG_IND) && -		    (tlb0cfg & TLBnCFG_PT)) { -			patch_branch(ibase + (0x1c0 / 4), -			     (unsigned long)&exc_data_tlb_miss_htw_book3e, 0); -			patch_branch(ibase + (0x1e0 / 4), -			     (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0); -			book3e_htw_enabled = 1; -		} -		pr_info("MMU: Book3E Page Tables %s\n", -			book3e_htw_enabled ? "Enabled" : "Disabled"); +		/* Look for HW tablewalk support */ +		setup_mmu_htw();  	}  	/* Set MAS4 based on page table setting */  |