diff options
Diffstat (limited to 'arch/arm/mm')
| -rw-r--r-- | arch/arm/mm/cache-aurora-l2.h | 55 | ||||
| -rw-r--r-- | arch/arm/mm/cache-l2x0.c | 278 | ||||
| -rw-r--r-- | arch/arm/mm/context.c | 207 | ||||
| -rw-r--r-- | arch/arm/mm/idmap.c | 14 | ||||
| -rw-r--r-- | arch/arm/mm/ioremap.c | 16 | ||||
| -rw-r--r-- | arch/arm/mm/mmap.c | 134 | ||||
| -rw-r--r-- | arch/arm/mm/mmu.c | 18 | ||||
| -rw-r--r-- | arch/arm/mm/proc-macros.S | 4 | ||||
| -rw-r--r-- | arch/arm/mm/proc-v6.S | 2 | ||||
| -rw-r--r-- | arch/arm/mm/proc-v7-2level.S | 10 | ||||
| -rw-r--r-- | arch/arm/mm/proc-v7-3level.S | 5 | ||||
| -rw-r--r-- | arch/arm/mm/proc-v7.S | 2 | 
12 files changed, 489 insertions, 256 deletions
diff --git a/arch/arm/mm/cache-aurora-l2.h b/arch/arm/mm/cache-aurora-l2.h new file mode 100644 index 00000000000..c8612476983 --- /dev/null +++ b/arch/arm/mm/cache-aurora-l2.h @@ -0,0 +1,55 @@ +/* + * AURORA shared L2 cache controller support + * + * Copyright (C) 2012 Marvell + * + * Yehuda Yitschak <yehuday@marvell.com> + * Gregory CLEMENT <gregory.clement@free-electrons.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2.  This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef __ASM_ARM_HARDWARE_AURORA_L2_H +#define __ASM_ARM_HARDWARE_AURORA_L2_H + +#define AURORA_SYNC_REG		    0x700 +#define AURORA_RANGE_BASE_ADDR_REG  0x720 +#define AURORA_FLUSH_PHY_ADDR_REG   0x7f0 +#define AURORA_INVAL_RANGE_REG	    0x774 +#define AURORA_CLEAN_RANGE_REG	    0x7b4 +#define AURORA_FLUSH_RANGE_REG	    0x7f4 + +#define AURORA_ACR_REPLACEMENT_OFFSET	    27 +#define AURORA_ACR_REPLACEMENT_MASK	     \ +	(0x3 << AURORA_ACR_REPLACEMENT_OFFSET) +#define AURORA_ACR_REPLACEMENT_TYPE_WAYRR    \ +	(0 << AURORA_ACR_REPLACEMENT_OFFSET) +#define AURORA_ACR_REPLACEMENT_TYPE_LFSR     \ +	(1 << AURORA_ACR_REPLACEMENT_OFFSET) +#define AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU \ +	(3 << AURORA_ACR_REPLACEMENT_OFFSET) + +#define AURORA_ACR_FORCE_WRITE_POLICY_OFFSET	0 +#define AURORA_ACR_FORCE_WRITE_POLICY_MASK	\ +	(0x3 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET) +#define AURORA_ACR_FORCE_WRITE_POLICY_DIS	\ +	(0 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET) +#define AURORA_ACR_FORCE_WRITE_BACK_POLICY	\ +	(1 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET) +#define AURORA_ACR_FORCE_WRITE_THRO_POLICY	\ +	(2 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET) + +#define MAX_RANGE_SIZE		1024 + +#define AURORA_WAY_SIZE_SHIFT	2 + +#define AURORA_CTRL_FW		0x100 + +/* chose a number outside L2X0_CACHE_ID_PART_MASK to be sure to make + * the distinction between a number coming from hardware and a number + * coming from the device tree */ +#define AURORA_CACHE_ID	       0x100 + +#endif /* __ASM_ARM_HARDWARE_AURORA_L2_H */ diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 8a97e6443c6..6911b8b2745 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -25,6 +25,7 @@  #include <asm/cacheflush.h>  #include <asm/hardware/cache-l2x0.h> +#include "cache-aurora-l2.h"  #define CACHE_LINE_SIZE		32 @@ -34,14 +35,20 @@ static u32 l2x0_way_mask;	/* Bitmask of active ways */  static u32 l2x0_size;  static unsigned long sync_reg_offset = L2X0_CACHE_SYNC; +/* Aurora don't have the cache ID register available, so we have to + * pass it though the device tree */ +static u32  cache_id_part_number_from_dt; +  struct l2x0_regs l2x0_saved_regs;  struct l2x0_of_data {  	void (*setup)(const struct device_node *, u32 *, u32 *);  	void (*save)(void); -	void (*resume)(void); +	struct outer_cache_fns outer_cache;  }; +static bool of_init = false; +  static inline void cache_wait_way(void __iomem *reg, unsigned long mask)  {  	/* wait for cache operation by line or way to complete */ @@ -168,7 +175,7 @@ static void l2x0_inv_all(void)  	/* invalidate all ways */  	raw_spin_lock_irqsave(&l2x0_lock, flags);  	/* Invalidating when L2 is enabled is a nono */ -	BUG_ON(readl(l2x0_base + L2X0_CTRL) & 1); +	BUG_ON(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN);  	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY);  	cache_wait_way(l2x0_base + L2X0_INV_WAY, l2x0_way_mask);  	cache_sync(); @@ -292,11 +299,18 @@ static void l2x0_unlock(u32 cache_id)  	int lockregs;  	int i; -	if (cache_id == L2X0_CACHE_ID_PART_L310) +	switch (cache_id) { +	case L2X0_CACHE_ID_PART_L310:  		lockregs = 8; -	else +		break; +	case AURORA_CACHE_ID: +		lockregs = 4; +		break; +	default:  		/* L210 and unknown types */  		lockregs = 1; +		break; +	}  	for (i = 0; i < lockregs; i++) {  		writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_D_BASE + @@ -312,18 +326,22 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)  	u32 cache_id;  	u32 way_size = 0;  	int ways; +	int way_size_shift = L2X0_WAY_SIZE_SHIFT;  	const char *type;  	l2x0_base = base; - -	cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); +	if (cache_id_part_number_from_dt) +		cache_id = cache_id_part_number_from_dt; +	else +		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID) +			& L2X0_CACHE_ID_PART_MASK;  	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);  	aux &= aux_mask;  	aux |= aux_val;  	/* Determine the number of ways */ -	switch (cache_id & L2X0_CACHE_ID_PART_MASK) { +	switch (cache_id) {  	case L2X0_CACHE_ID_PART_L310:  		if (aux & (1 << 16))  			ways = 16; @@ -340,6 +358,14 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)  		ways = (aux >> 13) & 0xf;  		type = "L210";  		break; + +	case AURORA_CACHE_ID: +		sync_reg_offset = AURORA_SYNC_REG; +		ways = (aux >> 13) & 0xf; +		ways = 2 << ((ways + 1) >> 2); +		way_size_shift = AURORA_WAY_SIZE_SHIFT; +		type = "Aurora"; +		break;  	default:  		/* Assume unknown chips have 8 ways */  		ways = 8; @@ -353,7 +379,8 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)  	 * L2 cache Size =  Way size * Number of ways  	 */  	way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17; -	way_size = 1 << (way_size + 3); +	way_size = 1 << (way_size + way_size_shift); +  	l2x0_size = ways * way_size * SZ_1K;  	/* @@ -361,7 +388,7 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)  	 * If you are booting from non-secure mode  	 * accessing the below registers will fault.  	 */ -	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { +	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {  		/* Make sure that I&D is not locked down when starting */  		l2x0_unlock(cache_id); @@ -371,7 +398,7 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)  		l2x0_inv_all();  		/* enable L2X0 */ -		writel_relaxed(1, l2x0_base + L2X0_CTRL); +		writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL);  	}  	/* Re-read it in case some bits are reserved. */ @@ -380,13 +407,15 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)  	/* Save the value for resuming. */  	l2x0_saved_regs.aux_ctrl = aux; -	outer_cache.inv_range = l2x0_inv_range; -	outer_cache.clean_range = l2x0_clean_range; -	outer_cache.flush_range = l2x0_flush_range; -	outer_cache.sync = l2x0_cache_sync; -	outer_cache.flush_all = l2x0_flush_all; -	outer_cache.inv_all = l2x0_inv_all; -	outer_cache.disable = l2x0_disable; +	if (!of_init) { +		outer_cache.inv_range = l2x0_inv_range; +		outer_cache.clean_range = l2x0_clean_range; +		outer_cache.flush_range = l2x0_flush_range; +		outer_cache.sync = l2x0_cache_sync; +		outer_cache.flush_all = l2x0_flush_all; +		outer_cache.inv_all = l2x0_inv_all; +		outer_cache.disable = l2x0_disable; +	}  	printk(KERN_INFO "%s cache controller enabled\n", type);  	printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d B\n", @@ -394,6 +423,100 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)  }  #ifdef CONFIG_OF +static int l2_wt_override; + +/* + * Note that the end addresses passed to Linux primitives are + * noninclusive, while the hardware cache range operations use + * inclusive start and end addresses. + */ +static unsigned long calc_range_end(unsigned long start, unsigned long end) +{ +	/* +	 * Limit the number of cache lines processed at once, +	 * since cache range operations stall the CPU pipeline +	 * until completion. +	 */ +	if (end > start + MAX_RANGE_SIZE) +		end = start + MAX_RANGE_SIZE; + +	/* +	 * Cache range operations can't straddle a page boundary. +	 */ +	if (end > PAGE_ALIGN(start+1)) +		end = PAGE_ALIGN(start+1); + +	return end; +} + +/* + * Make sure 'start' and 'end' reference the same page, as L2 is PIPT + * and range operations only do a TLB lookup on the start address. + */ +static void aurora_pa_range(unsigned long start, unsigned long end, +			unsigned long offset) +{ +	unsigned long flags; + +	raw_spin_lock_irqsave(&l2x0_lock, flags); +	writel(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG); +	writel(end, l2x0_base + offset); +	raw_spin_unlock_irqrestore(&l2x0_lock, flags); + +	cache_sync(); +} + +static void aurora_inv_range(unsigned long start, unsigned long end) +{ +	/* +	 * round start and end adresses up to cache line size +	 */ +	start &= ~(CACHE_LINE_SIZE - 1); +	end = ALIGN(end, CACHE_LINE_SIZE); + +	/* +	 * Invalidate all full cache lines between 'start' and 'end'. +	 */ +	while (start < end) { +		unsigned long range_end = calc_range_end(start, end); +		aurora_pa_range(start, range_end - CACHE_LINE_SIZE, +				AURORA_INVAL_RANGE_REG); +		start = range_end; +	} +} + +static void aurora_clean_range(unsigned long start, unsigned long end) +{ +	/* +	 * If L2 is forced to WT, the L2 will always be clean and we +	 * don't need to do anything here. +	 */ +	if (!l2_wt_override) { +		start &= ~(CACHE_LINE_SIZE - 1); +		end = ALIGN(end, CACHE_LINE_SIZE); +		while (start != end) { +			unsigned long range_end = calc_range_end(start, end); +			aurora_pa_range(start, range_end - CACHE_LINE_SIZE, +					AURORA_CLEAN_RANGE_REG); +			start = range_end; +		} +	} +} + +static void aurora_flush_range(unsigned long start, unsigned long end) +{ +	if (!l2_wt_override) { +		start &= ~(CACHE_LINE_SIZE - 1); +		end = ALIGN(end, CACHE_LINE_SIZE); +		while (start != end) { +			unsigned long range_end = calc_range_end(start, end); +			aurora_pa_range(start, range_end - CACHE_LINE_SIZE, +					AURORA_FLUSH_RANGE_REG); +			start = range_end; +		} +	} +} +  static void __init l2x0_of_setup(const struct device_node *np,  				 u32 *aux_val, u32 *aux_mask)  { @@ -491,9 +614,15 @@ static void __init pl310_save(void)  	}  } +static void aurora_save(void) +{ +	l2x0_saved_regs.ctrl = readl_relaxed(l2x0_base + L2X0_CTRL); +	l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); +} +  static void l2x0_resume(void)  { -	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { +	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {  		/* restore aux ctrl and enable l2 */  		l2x0_unlock(readl_relaxed(l2x0_base + L2X0_CACHE_ID)); @@ -502,7 +631,7 @@ static void l2x0_resume(void)  		l2x0_inv_all(); -		writel_relaxed(1, l2x0_base + L2X0_CTRL); +		writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL);  	}  } @@ -510,7 +639,7 @@ static void pl310_resume(void)  {  	u32 l2x0_revision; -	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { +	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {  		/* restore pl310 setup */  		writel_relaxed(l2x0_saved_regs.tag_latency,  			l2x0_base + L2X0_TAG_LATENCY_CTRL); @@ -536,22 +665,108 @@ static void pl310_resume(void)  	l2x0_resume();  } +static void aurora_resume(void) +{ +	if (!(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { +		writel(l2x0_saved_regs.aux_ctrl, l2x0_base + L2X0_AUX_CTRL); +		writel(l2x0_saved_regs.ctrl, l2x0_base + L2X0_CTRL); +	} +} + +static void __init aurora_broadcast_l2_commands(void) +{ +	__u32 u; +	/* Enable Broadcasting of cache commands to L2*/ +	__asm__ __volatile__("mrc p15, 1, %0, c15, c2, 0" : "=r"(u)); +	u |= AURORA_CTRL_FW;		/* Set the FW bit */ +	__asm__ __volatile__("mcr p15, 1, %0, c15, c2, 0\n" : : "r"(u)); +	isb(); +} + +static void __init aurora_of_setup(const struct device_node *np, +				u32 *aux_val, u32 *aux_mask) +{ +	u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU; +	u32 mask =  AURORA_ACR_REPLACEMENT_MASK; + +	of_property_read_u32(np, "cache-id-part", +			&cache_id_part_number_from_dt); + +	/* Determine and save the write policy */ +	l2_wt_override = of_property_read_bool(np, "wt-override"); + +	if (l2_wt_override) { +		val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY; +		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK; +	} + +	*aux_val &= ~mask; +	*aux_val |= val; +	*aux_mask &= ~mask; +} +  static const struct l2x0_of_data pl310_data = { -	pl310_of_setup, -	pl310_save, -	pl310_resume, +	.setup = pl310_of_setup, +	.save  = pl310_save, +	.outer_cache = { +		.resume      = pl310_resume, +		.inv_range   = l2x0_inv_range, +		.clean_range = l2x0_clean_range, +		.flush_range = l2x0_flush_range, +		.sync        = l2x0_cache_sync, +		.flush_all   = l2x0_flush_all, +		.inv_all     = l2x0_inv_all, +		.disable     = l2x0_disable, +		.set_debug   = pl310_set_debug, +	},  };  static const struct l2x0_of_data l2x0_data = { -	l2x0_of_setup, -	NULL, -	l2x0_resume, +	.setup = l2x0_of_setup, +	.save  = NULL, +	.outer_cache = { +		.resume      = l2x0_resume, +		.inv_range   = l2x0_inv_range, +		.clean_range = l2x0_clean_range, +		.flush_range = l2x0_flush_range, +		.sync        = l2x0_cache_sync, +		.flush_all   = l2x0_flush_all, +		.inv_all     = l2x0_inv_all, +		.disable     = l2x0_disable, +	}, +}; + +static const struct l2x0_of_data aurora_with_outer_data = { +	.setup = aurora_of_setup, +	.save  = aurora_save, +	.outer_cache = { +		.resume      = aurora_resume, +		.inv_range   = aurora_inv_range, +		.clean_range = aurora_clean_range, +		.flush_range = aurora_flush_range, +		.sync        = l2x0_cache_sync, +		.flush_all   = l2x0_flush_all, +		.inv_all     = l2x0_inv_all, +		.disable     = l2x0_disable, +	}, +}; + +static const struct l2x0_of_data aurora_no_outer_data = { +	.setup = aurora_of_setup, +	.save  = aurora_save, +	.outer_cache = { +		.resume      = aurora_resume, +	},  };  static const struct of_device_id l2x0_ids[] __initconst = {  	{ .compatible = "arm,pl310-cache", .data = (void *)&pl310_data },  	{ .compatible = "arm,l220-cache", .data = (void *)&l2x0_data },  	{ .compatible = "arm,l210-cache", .data = (void *)&l2x0_data }, +	{ .compatible = "marvell,aurora-system-cache", +	  .data = (void *)&aurora_no_outer_data}, +	{ .compatible = "marvell,aurora-outer-cache", +	  .data = (void *)&aurora_with_outer_data},  	{}  }; @@ -577,17 +792,24 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)  	data = of_match_node(l2x0_ids, np)->data;  	/* L2 configuration can only be changed if the cache is disabled */ -	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { +	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {  		if (data->setup)  			data->setup(np, &aux_val, &aux_mask); + +		/* For aurora cache in no outer mode select the +		 * correct mode using the coprocessor*/ +		if (data == &aurora_no_outer_data) +			aurora_broadcast_l2_commands();  	}  	if (data->save)  		data->save(); +	of_init = true;  	l2x0_init(l2x0_base, aux_val, aux_mask); -	outer_cache.resume = data->resume; +	memcpy(&outer_cache, &data->outer_cache, sizeof(outer_cache)); +  	return 0;  }  #endif diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 4e07eec1270..bc4a5e9ebb7 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -2,6 +2,9 @@   *  linux/arch/arm/mm/context.c   *   *  Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved. + *  Copyright (C) 2012 ARM Limited + * + *  Author: Will Deacon <will.deacon@arm.com>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -14,14 +17,40 @@  #include <linux/percpu.h>  #include <asm/mmu_context.h> +#include <asm/smp_plat.h>  #include <asm/thread_notify.h>  #include <asm/tlbflush.h> +/* + * On ARMv6, we have the following structure in the Context ID: + * + * 31                         7          0 + * +-------------------------+-----------+ + * |      process ID         |   ASID    | + * +-------------------------+-----------+ + * |              context ID             | + * +-------------------------------------+ + * + * The ASID is used to tag entries in the CPU caches and TLBs. + * The context ID is used by debuggers and trace logic, and + * should be unique within all running processes. + */ +#define ASID_FIRST_VERSION	(1ULL << ASID_BITS) +#define NUM_USER_ASIDS		(ASID_FIRST_VERSION - 1) + +#define ASID_TO_IDX(asid)	((asid & ~ASID_MASK) - 1) +#define IDX_TO_ASID(idx)	((idx + 1) & ~ASID_MASK) +  static DEFINE_RAW_SPINLOCK(cpu_asid_lock); -unsigned int cpu_last_asid = ASID_FIRST_VERSION; +static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); +static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); + +static DEFINE_PER_CPU(atomic64_t, active_asids); +static DEFINE_PER_CPU(u64, reserved_asids); +static cpumask_t tlb_flush_pending;  #ifdef CONFIG_ARM_LPAE -void cpu_set_reserved_ttbr0(void) +static void cpu_set_reserved_ttbr0(void)  {  	unsigned long ttbl = __pa(swapper_pg_dir);  	unsigned long ttbh = 0; @@ -37,7 +66,7 @@ void cpu_set_reserved_ttbr0(void)  	isb();  }  #else -void cpu_set_reserved_ttbr0(void) +static void cpu_set_reserved_ttbr0(void)  {  	u32 ttb;  	/* Copy TTBR1 into TTBR0 */ @@ -84,124 +113,104 @@ static int __init contextidr_notifier_init(void)  arch_initcall(contextidr_notifier_init);  #endif -/* - * We fork()ed a process, and we need a new context for the child - * to run in. - */ -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) +static void flush_context(unsigned int cpu)  { -	mm->context.id = 0; -	raw_spin_lock_init(&mm->context.id_lock); -} +	int i; +	u64 asid; -static void flush_context(void) -{ -	cpu_set_reserved_ttbr0(); -	local_flush_tlb_all(); -	if (icache_is_vivt_asid_tagged()) { -		__flush_icache_all(); -		dsb(); +	/* Update the list of reserved ASIDs and the ASID bitmap. */ +	bitmap_clear(asid_map, 0, NUM_USER_ASIDS); +	for_each_possible_cpu(i) { +		if (i == cpu) { +			asid = 0; +		} else { +			asid = atomic64_xchg(&per_cpu(active_asids, i), 0); +			__set_bit(ASID_TO_IDX(asid), asid_map); +		} +		per_cpu(reserved_asids, i) = asid;  	} + +	/* Queue a TLB invalidate and flush the I-cache if necessary. */ +	if (!tlb_ops_need_broadcast()) +		cpumask_set_cpu(cpu, &tlb_flush_pending); +	else +		cpumask_setall(&tlb_flush_pending); + +	if (icache_is_vivt_asid_tagged()) +		__flush_icache_all();  } -#ifdef CONFIG_SMP +static int is_reserved_asid(u64 asid) +{ +	int cpu; +	for_each_possible_cpu(cpu) +		if (per_cpu(reserved_asids, cpu) == asid) +			return 1; +	return 0; +} -static void set_mm_context(struct mm_struct *mm, unsigned int asid) +static void new_context(struct mm_struct *mm, unsigned int cpu)  { -	unsigned long flags; +	u64 asid = mm->context.id; +	u64 generation = atomic64_read(&asid_generation); -	/* -	 * Locking needed for multi-threaded applications where the -	 * same mm->context.id could be set from different CPUs during -	 * the broadcast. This function is also called via IPI so the -	 * mm->context.id_lock has to be IRQ-safe. -	 */ -	raw_spin_lock_irqsave(&mm->context.id_lock, flags); -	if (likely((mm->context.id ^ cpu_last_asid) >> ASID_BITS)) { +	if (asid != 0 && is_reserved_asid(asid)) { +		/* +		 * Our current ASID was active during a rollover, we can +		 * continue to use it and this was just a false alarm. +		 */ +		asid = generation | (asid & ~ASID_MASK); +	} else {  		/* -		 * Old version of ASID found. Set the new one and -		 * reset mm_cpumask(mm). +		 * Allocate a free ASID. If we can't find one, take a +		 * note of the currently active ASIDs and mark the TLBs +		 * as requiring flushes.  		 */ -		mm->context.id = asid; +		asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); +		if (asid == NUM_USER_ASIDS) { +			generation = atomic64_add_return(ASID_FIRST_VERSION, +							 &asid_generation); +			flush_context(cpu); +			asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); +		} +		__set_bit(asid, asid_map); +		asid = generation | IDX_TO_ASID(asid);  		cpumask_clear(mm_cpumask(mm));  	} -	raw_spin_unlock_irqrestore(&mm->context.id_lock, flags); -	/* -	 * Set the mm_cpumask(mm) bit for the current CPU. -	 */ -	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); +	mm->context.id = asid;  } -/* - * Reset the ASID on the current CPU. This function call is broadcast - * from the CPU handling the ASID rollover and holding cpu_asid_lock. - */ -static void reset_context(void *info) +void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)  { -	unsigned int asid; +	unsigned long flags;  	unsigned int cpu = smp_processor_id(); -	struct mm_struct *mm = current->active_mm; -	smp_rmb(); -	asid = cpu_last_asid + cpu + 1; +	if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)) +		__check_vmalloc_seq(mm); -	flush_context(); -	set_mm_context(mm, asid); - -	/* set the new ASID */ -	cpu_switch_mm(mm->pgd, mm); -} +	/* +	 * Required during context switch to avoid speculative page table +	 * walking with the wrong TTBR. +	 */ +	cpu_set_reserved_ttbr0(); -#else +	if (!((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS) +	    && atomic64_xchg(&per_cpu(active_asids, cpu), mm->context.id)) +		goto switch_mm_fastpath; -static inline void set_mm_context(struct mm_struct *mm, unsigned int asid) -{ -	mm->context.id = asid; -	cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id())); -} +	raw_spin_lock_irqsave(&cpu_asid_lock, flags); +	/* Check that our ASID belongs to the current generation. */ +	if ((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS) +		new_context(mm, cpu); -#endif +	atomic64_set(&per_cpu(active_asids, cpu), mm->context.id); +	cpumask_set_cpu(cpu, mm_cpumask(mm)); -void __new_context(struct mm_struct *mm) -{ -	unsigned int asid; +	if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) +		local_flush_tlb_all(); +	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); -	raw_spin_lock(&cpu_asid_lock); -#ifdef CONFIG_SMP -	/* -	 * Check the ASID again, in case the change was broadcast from -	 * another CPU before we acquired the lock. -	 */ -	if (unlikely(((mm->context.id ^ cpu_last_asid) >> ASID_BITS) == 0)) { -		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); -		raw_spin_unlock(&cpu_asid_lock); -		return; -	} -#endif -	/* -	 * At this point, it is guaranteed that the current mm (with -	 * an old ASID) isn't active on any other CPU since the ASIDs -	 * are changed simultaneously via IPI. -	 */ -	asid = ++cpu_last_asid; -	if (asid == 0) -		asid = cpu_last_asid = ASID_FIRST_VERSION; - -	/* -	 * If we've used up all our ASIDs, we need -	 * to start a new version and flush the TLB. -	 */ -	if (unlikely((asid & ~ASID_MASK) == 0)) { -		asid = cpu_last_asid + smp_processor_id() + 1; -		flush_context(); -#ifdef CONFIG_SMP -		smp_wmb(); -		smp_call_function(reset_context, NULL, 1); -#endif -		cpu_last_asid += NR_CPUS; -	} - -	set_mm_context(mm, asid); -	raw_spin_unlock(&cpu_asid_lock); +switch_mm_fastpath: +	cpu_switch_mm(mm->pgd, mm);  } diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index ab88ed4f8e0..99db769307e 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -92,6 +92,9 @@ static int __init init_static_idmap(void)  		(long long)idmap_start, (long long)idmap_end);  	identity_mapping_add(idmap_pgd, idmap_start, idmap_end); +	/* Flush L1 for the hardware to see this page table content */ +	flush_cache_louis(); +  	return 0;  }  early_initcall(init_static_idmap); @@ -103,12 +106,15 @@ early_initcall(init_static_idmap);   */  void setup_mm_for_reboot(void)  { -	/* Clean and invalidate L1. */ -	flush_cache_all(); -  	/* Switch to the identity mapping. */  	cpu_switch_mm(idmap_pgd, &init_mm); -	/* Flush the TLB. */ +#ifdef CONFIG_CPU_HAS_ASID +	/* +	 * We don't have a clean ASID for the identity mapping, which +	 * may clash with virtual addresses of the previous page tables +	 * and therefore potentially in the TLB. +	 */  	local_flush_tlb_all(); +#endif  } diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 5dcc2fd46c4..88fd86cf3d9 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -47,18 +47,18 @@ int ioremap_page(unsigned long virt, unsigned long phys,  }  EXPORT_SYMBOL(ioremap_page); -void __check_kvm_seq(struct mm_struct *mm) +void __check_vmalloc_seq(struct mm_struct *mm)  {  	unsigned int seq;  	do { -		seq = init_mm.context.kvm_seq; +		seq = init_mm.context.vmalloc_seq;  		memcpy(pgd_offset(mm, VMALLOC_START),  		       pgd_offset_k(VMALLOC_START),  		       sizeof(pgd_t) * (pgd_index(VMALLOC_END) -  					pgd_index(VMALLOC_START))); -		mm->context.kvm_seq = seq; -	} while (seq != init_mm.context.kvm_seq); +		mm->context.vmalloc_seq = seq; +	} while (seq != init_mm.context.vmalloc_seq);  }  #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) @@ -89,13 +89,13 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)  		if (!pmd_none(pmd)) {  			/*  			 * Clear the PMD from the page table, and -			 * increment the kvm sequence so others +			 * increment the vmalloc sequence so others  			 * notice this change.  			 *  			 * Note: this is still racy on SMP machines.  			 */  			pmd_clear(pmdp); -			init_mm.context.kvm_seq++; +			init_mm.context.vmalloc_seq++;  			/*  			 * Free the page table, if there was one. @@ -112,8 +112,8 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)  	 * Ensure that the active_mm is up to date - we want to  	 * catch any use-after-iounmap cases.  	 */ -	if (current->active_mm->context.kvm_seq != init_mm.context.kvm_seq) -		__check_kvm_seq(current->active_mm); +	if (current->active_mm->context.vmalloc_seq != init_mm.context.vmalloc_seq) +		__check_vmalloc_seq(current->active_mm);  	flush_tlb_kernel_range(virt, end);  } diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index ce8cb1970d7..10062ceadd1 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -11,18 +11,6 @@  #include <linux/random.h>  #include <asm/cachetype.h> -static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr, -					      unsigned long pgoff) -{ -	unsigned long base = addr & ~(SHMLBA-1); -	unsigned long off = (pgoff << PAGE_SHIFT) & (SHMLBA-1); - -	if (base + off <= addr) -		return base + off; - -	return base - off; -} -  #define COLOUR_ALIGN(addr,pgoff)		\  	((((addr)+SHMLBA-1)&~(SHMLBA-1)) +	\  	 (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1))) @@ -69,9 +57,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,  {  	struct mm_struct *mm = current->mm;  	struct vm_area_struct *vma; -	unsigned long start_addr;  	int do_align = 0;  	int aliasing = cache_is_vipt_aliasing(); +	struct vm_unmapped_area_info info;  	/*  	 * We only need to do colour alignment if either the I or D @@ -104,46 +92,14 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,  		    (!vma || addr + len <= vma->vm_start))  			return addr;  	} -	if (len > mm->cached_hole_size) { -	        start_addr = addr = mm->free_area_cache; -	} else { -	        start_addr = addr = mm->mmap_base; -	        mm->cached_hole_size = 0; -	} -full_search: -	if (do_align) -		addr = COLOUR_ALIGN(addr, pgoff); -	else -		addr = PAGE_ALIGN(addr); - -	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { -		/* At this point:  (!vma || addr < vma->vm_end). */ -		if (TASK_SIZE - len < addr) { -			/* -			 * Start a new search - just in case we missed -			 * some holes. -			 */ -			if (start_addr != TASK_UNMAPPED_BASE) { -				start_addr = addr = TASK_UNMAPPED_BASE; -				mm->cached_hole_size = 0; -				goto full_search; -			} -			return -ENOMEM; -		} -		if (!vma || addr + len <= vma->vm_start) { -			/* -			 * Remember the place where we stopped the search: -			 */ -			mm->free_area_cache = addr + len; -			return addr; -		} -		if (addr + mm->cached_hole_size < vma->vm_start) -		        mm->cached_hole_size = vma->vm_start - addr; -		addr = vma->vm_end; -		if (do_align) -			addr = COLOUR_ALIGN(addr, pgoff); -	} +	info.flags = 0; +	info.length = len; +	info.low_limit = mm->mmap_base; +	info.high_limit = TASK_SIZE; +	info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; +	info.align_offset = pgoff << PAGE_SHIFT; +	return vm_unmapped_area(&info);  }  unsigned long @@ -156,6 +112,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,  	unsigned long addr = addr0;  	int do_align = 0;  	int aliasing = cache_is_vipt_aliasing(); +	struct vm_unmapped_area_info info;  	/*  	 * We only need to do colour alignment if either the I or D @@ -187,70 +144,27 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,  			return addr;  	} -	/* check if free_area_cache is useful for us */ -	if (len <= mm->cached_hole_size) { -		mm->cached_hole_size = 0; -		mm->free_area_cache = mm->mmap_base; -	} +	info.flags = VM_UNMAPPED_AREA_TOPDOWN; +	info.length = len; +	info.low_limit = PAGE_SIZE; +	info.high_limit = mm->mmap_base; +	info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; +	info.align_offset = pgoff << PAGE_SHIFT; +	addr = vm_unmapped_area(&info); -	/* either no address requested or can't fit in requested address hole */ -	addr = mm->free_area_cache; -	if (do_align) { -		unsigned long base = COLOUR_ALIGN_DOWN(addr - len, pgoff); -		addr = base + len; -	} - -	/* make sure it can fit in the remaining address space */ -	if (addr > len) { -		vma = find_vma(mm, addr-len); -		if (!vma || addr <= vma->vm_start) -			/* remember the address as a hint for next time */ -			return (mm->free_area_cache = addr-len); -	} - -	if (mm->mmap_base < len) -		goto bottomup; - -	addr = mm->mmap_base - len; -	if (do_align) -		addr = COLOUR_ALIGN_DOWN(addr, pgoff); - -	do { -		/* -		 * Lookup failure means no vma is above this address, -		 * else if new region fits below vma->vm_start, -		 * return with success: -		 */ -		vma = find_vma(mm, addr); -		if (!vma || addr+len <= vma->vm_start) -			/* remember the address as a hint for next time */ -			return (mm->free_area_cache = addr); - -		/* remember the largest hole we saw so far */ -		if (addr + mm->cached_hole_size < vma->vm_start) -			mm->cached_hole_size = vma->vm_start - addr; - -		/* try just below the current vma->vm_start */ -		addr = vma->vm_start - len; -		if (do_align) -			addr = COLOUR_ALIGN_DOWN(addr, pgoff); -	} while (len < vma->vm_start); - -bottomup:  	/*  	 * A failed mmap() very likely causes application failure,  	 * so fall back to the bottom-up function here. This scenario  	 * can happen with large stack limits and large mmap()  	 * allocations.  	 */ -	mm->cached_hole_size = ~0UL; -	mm->free_area_cache = TASK_UNMAPPED_BASE; -	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); -	/* -	 * Restore the topdown base: -	 */ -	mm->free_area_cache = mm->mmap_base; -	mm->cached_hole_size = ~0UL; +	if (addr & ~PAGE_MASK) { +		VM_BUG_ON(addr != -ENOMEM); +		info.flags = 0; +		info.low_limit = mm->mmap_base; +		info.high_limit = TASK_SIZE; +		addr = vm_unmapped_area(&info); +	}  	return addr;  } @@ -279,7 +193,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)   * You really shouldn't be using read() or write() on /dev/mem.  This   * might go away in the future.   */ -int valid_phys_addr_range(unsigned long addr, size_t size) +int valid_phys_addr_range(phys_addr_t addr, size_t size)  {  	if (addr < PHYS_OFFSET)  		return 0; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 941dfb9e9a7..9f0610243bd 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -488,7 +488,7 @@ static void __init build_mem_type_table(void)  #endif  	for (i = 0; i < 16; i++) { -		unsigned long v = pgprot_val(protection_map[i]); +		pteval_t v = pgprot_val(protection_map[i]);  		protection_map[i] = __pgprot(v | user_pgprot);  	} @@ -876,6 +876,22 @@ static void __init pci_reserve_io(void)  #define pci_reserve_io() do { } while (0)  #endif +#ifdef CONFIG_DEBUG_LL +void __init debug_ll_io_init(void) +{ +	struct map_desc map; + +	debug_ll_addr(&map.pfn, &map.virtual); +	if (!map.pfn || !map.virtual) +		return; +	map.pfn = __phys_to_pfn(map.pfn); +	map.virtual &= PAGE_MASK; +	map.length = PAGE_SIZE; +	map.type = MT_DEVICE; +	create_mapping(&map); +} +#endif +  static void * __initdata vmalloc_min =  	(void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET); diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index b29a2265af0..eb6aa73bc8b 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -167,6 +167,10 @@  	tst	r1, #L_PTE_YOUNG  	tstne	r1, #L_PTE_PRESENT  	moveq	r3, #0 +#ifndef CONFIG_CPU_USE_DOMAINS +	tstne	r1, #L_PTE_NONE +	movne	r3, #0 +#endif  	str	r3, [r0]  	mcr	p15, 0, r0, c7, c10, 1		@ flush_pte diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 86b8b480634..09c5233f4df 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -89,7 +89,7 @@ ENTRY(cpu_v6_dcache_clean_area)  	mov	pc, lr  /* - *	cpu_arm926_switch_mm(pgd_phys, tsk) + *	cpu_v6_switch_mm(pgd_phys, tsk)   *   *	Set the translation table base pointer to be pgd_phys   * diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index fd045e70639..6d98c13ab82 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -100,7 +100,11 @@ ENTRY(cpu_v7_set_pte_ext)  	orrne	r3, r3, #PTE_EXT_XN  	tst	r1, #L_PTE_YOUNG -	tstne	r1, #L_PTE_PRESENT +	tstne	r1, #L_PTE_VALID +#ifndef CONFIG_CPU_USE_DOMAINS +	eorne	r1, r1, #L_PTE_NONE +	tstne	r1, #L_PTE_NONE +#endif  	moveq	r3, #0   ARM(	str	r3, [r0, #2048]! ) @@ -161,11 +165,11 @@ ENDPROC(cpu_v7_set_pte_ext)  	 *  TFR   EV X F   I D LR    S  	 * .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM  	 * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced -	 *    1    0 110       0011 1100 .111 1101 < we want +	 *   01    0 110       0011 1100 .111 1101 < we want  	 */  	.align	2  	.type	v7_crval, #object  v7_crval: -	crval	clear=0x0120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c +	crval	clear=0x2120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c  	.previous diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 8de0f1dd154..7b56386f949 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -65,8 +65,11 @@ ENDPROC(cpu_v7_switch_mm)   */  ENTRY(cpu_v7_set_pte_ext)  #ifdef CONFIG_MMU -	tst	r2, #L_PTE_PRESENT +	tst	r2, #L_PTE_VALID  	beq	1f +	tst	r3, #1 << (57 - 32)		@ L_PTE_NONE +	bicne	r2, #L_PTE_VALID +	bne	1f  	tst	r3, #1 << (55 - 32)		@ L_PTE_DIRTY  	orreq	r2, #L_PTE_RDONLY  1:	strd	r2, r3, [r0] diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 846d279f317..42cc833aa02 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -57,7 +57,7 @@ ENTRY(cpu_v7_reset)   THUMB(	bic	r1, r1, #1 << 30 )		@ SCTLR.TE (Thumb exceptions)  	mcr	p15, 0, r1, c1, c0, 0		@ disable MMU  	isb -	mov	pc, r0 +	bx	r0  ENDPROC(cpu_v7_reset)  	.popsection  |