diff options
Diffstat (limited to 'arch/powerpc/kernel')
40 files changed, 688 insertions, 289 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 83afacd3ba7..bb282dd8161 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -128,6 +128,7 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)  obj-y				+= ppc_save_regs.o  endif +obj-$(CONFIG_EPAPR_PARAVIRT)	+= epapr_paravirt.o epapr_hcalls.o  obj-$(CONFIG_KVM_GUEST)		+= kvm.o kvm_emul.o  # Disable GCOV in odd or sensitive code diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 52c7ad78242..85b05c463fa 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -533,6 +533,7 @@ int main(void)  	HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler);  	HSTATE_FIELD(HSTATE_SCRATCH0, scratch0);  	HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); +	HSTATE_FIELD(HSTATE_SPRG3, sprg3);  	HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);  	HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);  	HSTATE_FIELD(HSTATE_NAPPING, napping); diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S index ebc62f42a23..61f079e05b6 100644 --- a/arch/powerpc/kernel/cpu_setup_a2.S +++ b/arch/powerpc/kernel/cpu_setup_a2.S @@ -100,19 +100,19 @@ _icswx_skip_guest:  	lis	r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h  	mtspr	SPRN_MMUCR0, r4  	li	r4,A2_IERAT_SIZE-1 -	PPC_ERATWE(r4,r4,3) +	PPC_ERATWE(R4,R4,3)  	/* Now set the D-ERAT watermark to 31 */  	lis	r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h  	mtspr	SPRN_MMUCR0, r4  	li	r4,A2_DERAT_SIZE-1 -	PPC_ERATWE(r4,r4,3) +	PPC_ERATWE(R4,R4,3)  	/* And invalidate the beast just in case. That won't get rid of  	 * a bolted entry though it will be in LRU and so will go away eventually  	 * but let's not bother for now  	 */ -	PPC_ERATILX(0,0,0) +	PPC_ERATILX(0,0,R0)  1:  	blr diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index bcfdcd22c76..2d7bb8ced13 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -109,6 +109,7 @@ static u64 dma_iommu_get_required_mask(struct device *dev)  struct dma_map_ops dma_iommu_ops = {  	.alloc			= dma_iommu_alloc_coherent,  	.free			= dma_iommu_free_coherent, +	.mmap			= dma_direct_mmap_coherent,  	.map_sg			= dma_iommu_map_sg,  	.unmap_sg		= dma_iommu_unmap_sg,  	.dma_supported		= dma_iommu_dma_supported, diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 4ab88dafb23..46943651da2 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -49,6 +49,7 @@ static u64 swiotlb_powerpc_get_required(struct device *dev)  struct dma_map_ops swiotlb_dma_ops = {  	.alloc = dma_direct_alloc_coherent,  	.free = dma_direct_free_coherent, +	.mmap = dma_direct_mmap_coherent,  	.map_sg = swiotlb_map_sg_attrs,  	.unmap_sg = swiotlb_unmap_sg_attrs,  	.dma_supported = swiotlb_dma_supported, diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index b1ec983dcec..355b9d84b0f 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -11,6 +11,8 @@  #include <linux/gfp.h>  #include <linux/memblock.h>  #include <linux/export.h> +#include <linux/pci.h> +#include <asm/vio.h>  #include <asm/bug.h>  #include <asm/abs_addr.h>  #include <asm/machdep.h> @@ -65,6 +67,24 @@ void dma_direct_free_coherent(struct device *dev, size_t size,  #endif  } +int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma, +			     void *cpu_addr, dma_addr_t handle, size_t size, +			     struct dma_attrs *attrs) +{ +	unsigned long pfn; + +#ifdef CONFIG_NOT_COHERENT_CACHE +	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); +	pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr); +#else +	pfn = page_to_pfn(virt_to_page(cpu_addr)); +#endif +	return remap_pfn_range(vma, vma->vm_start, +			       pfn + vma->vm_pgoff, +			       vma->vm_end - vma->vm_start, +			       vma->vm_page_prot); +} +  static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,  			     int nents, enum dma_data_direction direction,  			     struct dma_attrs *attrs) @@ -154,6 +174,7 @@ static inline void dma_direct_sync_single(struct device *dev,  struct dma_map_ops dma_direct_ops = {  	.alloc				= dma_direct_alloc_coherent,  	.free				= dma_direct_free_coherent, +	.mmap				= dma_direct_mmap_coherent,  	.map_sg				= dma_direct_map_sg,  	.unmap_sg			= dma_direct_unmap_sg,  	.dma_supported			= dma_direct_dma_supported, @@ -205,26 +226,15 @@ EXPORT_SYMBOL_GPL(dma_get_required_mask);  static int __init dma_init(void)  { -       dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); +	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); +#ifdef CONFIG_PCI +	dma_debug_add_bus(&pci_bus_type); +#endif +#ifdef CONFIG_IBMVIO +	dma_debug_add_bus(&vio_bus_type); +#endif         return 0;  }  fs_initcall(dma_init); -int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, -		      void *cpu_addr, dma_addr_t handle, size_t size) -{ -	unsigned long pfn; - -#ifdef CONFIG_NOT_COHERENT_CACHE -	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -	pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr); -#else -	pfn = page_to_pfn(virt_to_page(cpu_addr)); -#endif -	return remap_pfn_range(vma, vma->vm_start, -			       pfn + vma->vm_pgoff, -			       vma->vm_end - vma->vm_start, -			       vma->vm_page_prot); -} -EXPORT_SYMBOL_GPL(dma_mmap_coherent); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index ba3aeb4bc06..ead5016b02d 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -89,6 +89,10 @@ crit_transfer_to_handler:  	mfspr	r0,SPRN_SRR1  	stw	r0,_SRR1(r11) +	/* set the stack limit to the current stack +	 * and set the limit to protect the thread_info +	 * struct +	 */  	mfspr	r8,SPRN_SPRG_THREAD  	lwz	r0,KSP_LIMIT(r8)  	stw	r0,SAVED_KSP_LIMIT(r11) @@ -109,6 +113,10 @@ crit_transfer_to_handler:  	mfspr	r0,SPRN_SRR1  	stw	r0,crit_srr1@l(0) +	/* set the stack limit to the current stack +	 * and set the limit to protect the thread_info +	 * struct +	 */  	mfspr	r8,SPRN_SPRG_THREAD  	lwz	r0,KSP_LIMIT(r8)  	stw	r0,saved_ksp_limit@l(0) @@ -158,7 +166,7 @@ transfer_to_handler:  	tophys(r11,r11)  	addi	r11,r11,global_dbcr0@l  #ifdef CONFIG_SMP -	rlwinm	r9,r1,0,0,(31-THREAD_SHIFT) +	CURRENT_THREAD_INFO(r9, r1)  	lwz	r9,TI_CPU(r9)  	slwi	r9,r9,3  	add	r11,r11,r9 @@ -179,7 +187,7 @@ transfer_to_handler:  	ble-	stack_ovf		/* then the kernel stack overflowed */  5:  #if defined(CONFIG_6xx) || defined(CONFIG_E500) -	rlwinm	r9,r1,0,0,31-THREAD_SHIFT +	CURRENT_THREAD_INFO(r9, r1)  	tophys(r9,r9)			/* check local flags */  	lwz	r12,TI_LOCAL_FLAGS(r9)  	mtcrf	0x01,r12 @@ -226,13 +234,7 @@ reenable_mmu:				/* re-enable mmu so we can */  	stw	r3,16(r1)  	stw	r4,20(r1)  	stw	r5,24(r1) -	andi.	r12,r12,MSR_PR -	b	11f -	bl	trace_hardirqs_off -	b	12f -11:  	bl	trace_hardirqs_off -12:  	lwz	r5,24(r1)  	lwz	r4,20(r1)  	lwz	r3,16(r1) @@ -333,7 +335,7 @@ _GLOBAL(DoSyscall)  	mtmsr	r11  1:  #endif /* CONFIG_TRACE_IRQFLAGS */ -	rlwinm	r10,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */ +	CURRENT_THREAD_INFO(r10, r1)  	lwz	r11,TI_FLAGS(r10)  	andi.	r11,r11,_TIF_SYSCALL_T_OR_A  	bne-	syscall_dotrace @@ -354,7 +356,7 @@ ret_from_syscall:  	bl	do_show_syscall_exit  #endif  	mr	r6,r3 -	rlwinm	r12,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */ +	CURRENT_THREAD_INFO(r12, r1)  	/* disable interrupts so current_thread_info()->flags can't change */  	LOAD_MSR_KERNEL(r10,MSR_KERNEL)	/* doesn't include MSR_EE */  	/* Note: We don't bother telling lockdep about it */ @@ -815,7 +817,7 @@ ret_from_except:  user_exc_return:		/* r10 contains MSR_KERNEL here */  	/* Check current_thread_info()->flags */ -	rlwinm	r9,r1,0,0,(31-THREAD_SHIFT) +	CURRENT_THREAD_INFO(r9, r1)  	lwz	r9,TI_FLAGS(r9)  	andi.	r0,r9,_TIF_USER_WORK_MASK  	bne	do_work @@ -835,7 +837,7 @@ restore_user:  /* N.B. the only way to get here is from the beq following ret_from_except. */  resume_kernel:  	/* check current_thread_info->preempt_count */ -	rlwinm	r9,r1,0,0,(31-THREAD_SHIFT) +	CURRENT_THREAD_INFO(r9, r1)  	lwz	r0,TI_PREEMPT(r9)  	cmpwi	0,r0,0		/* if non-zero, just restore regs and return */  	bne	restore @@ -852,7 +854,7 @@ resume_kernel:  	bl	trace_hardirqs_off  #endif  1:	bl	preempt_schedule_irq -	rlwinm	r9,r1,0,0,(31-THREAD_SHIFT) +	CURRENT_THREAD_INFO(r9, r1)  	lwz	r3,TI_FLAGS(r9)  	andi.	r0,r3,_TIF_NEED_RESCHED  	bne-	1b @@ -1122,7 +1124,7 @@ ret_from_debug_exc:  	lwz	r10,SAVED_KSP_LIMIT(r1)  	stw	r10,KSP_LIMIT(r9)  	lwz	r9,THREAD_INFO-THREAD(r9) -	rlwinm	r10,r1,0,0,(31-THREAD_SHIFT) +	CURRENT_THREAD_INFO(r10, r1)  	lwz	r10,TI_PREEMPT(r10)  	stw	r10,TI_PREEMPT(r9)  	RESTORE_xSRR(SRR0,SRR1); @@ -1156,7 +1158,7 @@ load_dbcr0:  	lis	r11,global_dbcr0@ha  	addi	r11,r11,global_dbcr0@l  #ifdef CONFIG_SMP -	rlwinm	r9,r1,0,0,(31-THREAD_SHIFT) +	CURRENT_THREAD_INFO(r9, r1)  	lwz	r9,TI_CPU(r9)  	slwi	r9,r9,3  	add	r11,r11,r9 @@ -1197,7 +1199,7 @@ recheck:  	LOAD_MSR_KERNEL(r10,MSR_KERNEL)  	SYNC  	MTMSRD(r10)		/* disable interrupts */ -	rlwinm	r9,r1,0,0,(31-THREAD_SHIFT) +	CURRENT_THREAD_INFO(r9, r1)  	lwz	r9,TI_FLAGS(r9)  	andi.	r0,r9,_TIF_NEED_RESCHED  	bne-	do_resched diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 5971c85df13..4b01a25e29e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -146,7 +146,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)  	REST_2GPRS(7,r1)  	addi	r9,r1,STACK_FRAME_OVERHEAD  #endif -	clrrdi	r11,r1,THREAD_SHIFT +	CURRENT_THREAD_INFO(r11, r1)  	ld	r10,TI_FLAGS(r11)  	andi.	r11,r10,_TIF_SYSCALL_T_OR_A  	bne-	syscall_dotrace @@ -181,7 +181,7 @@ syscall_exit:  	bl	.do_show_syscall_exit  	ld	r3,RESULT(r1)  #endif -	clrrdi	r12,r1,THREAD_SHIFT +	CURRENT_THREAD_INFO(r12, r1)  	ld	r8,_MSR(r1)  #ifdef CONFIG_PPC_BOOK3S @@ -197,7 +197,16 @@ syscall_exit:  	wrteei	0  #else  	ld	r10,PACAKMSR(r13) -	mtmsrd	r10,1 +	/* +	 * For performance reasons we clear RI the same time that we +	 * clear EE. We only need to clear RI just before we restore r13 +	 * below, but batching it with EE saves us one expensive mtmsrd call. +	 * We have to be careful to restore RI if we branch anywhere from +	 * here (eg syscall_exit_work). +	 */ +	li	r9,MSR_RI +	andc	r11,r10,r9 +	mtmsrd	r11,1  #endif /* CONFIG_PPC_BOOK3E */  	ld	r9,TI_FLAGS(r12) @@ -214,17 +223,6 @@ BEGIN_FTR_SECTION  END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)  	andi.	r6,r8,MSR_PR  	ld	r4,_LINK(r1) -	/* -	 * Clear RI before restoring r13.  If we are returning to -	 * userspace and we take an exception after restoring r13, -	 * we end up corrupting the userspace r13 value. -	 */ -#ifdef CONFIG_PPC_BOOK3S -	/* No MSR:RI on BookE */ -	li	r12,MSR_RI -	andc	r11,r10,r12 -	mtmsrd	r11,1			/* clear MSR.RI */ -#endif /* CONFIG_PPC_BOOK3S */  	beq-	1f  	ACCOUNT_CPU_USER_EXIT(r11, r12) @@ -262,7 +260,7 @@ syscall_dotrace:  	ld	r7,GPR7(r1)  	ld	r8,GPR8(r1)  	addi	r9,r1,STACK_FRAME_OVERHEAD -	clrrdi	r10,r1,THREAD_SHIFT +	CURRENT_THREAD_INFO(r10, r1)  	ld	r10,TI_FLAGS(r10)  	b	.Lsyscall_dotrace_cont @@ -271,6 +269,9 @@ syscall_enosys:  	b	syscall_exit  syscall_exit_work: +#ifdef CONFIG_PPC_BOOK3S +	mtmsrd	r10,1		/* Restore RI */ +#endif  	/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.  	 If TIF_NOERROR is set, just save r3 as it is. */ @@ -499,7 +500,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)  2:  #endif /* !CONFIG_PPC_BOOK3S */ -	clrrdi	r7,r8,THREAD_SHIFT	/* base of new stack */ +	CURRENT_THREAD_INFO(r7, r8)  /* base of new stack */  	/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE  	   because we don't need to leave the 288-byte ABI gap at the  	   top of the kernel stack. */ @@ -558,7 +559,7 @@ _GLOBAL(ret_from_except_lite)  	mtmsrd	r10,1		  /* Update machine state */  #endif /* CONFIG_PPC_BOOK3E */ -	clrrdi	r9,r1,THREAD_SHIFT	/* current_thread_info() */ +	CURRENT_THREAD_INFO(r9, r1)  	ld	r3,_MSR(r1)  	ld	r4,TI_FLAGS(r9)  	andi.	r3,r3,MSR_PR @@ -601,7 +602,7 @@ resume_kernel:  1:	bl	.preempt_schedule_irq  	/* Re-test flags and eventually loop */ -	clrrdi	r9,r1,THREAD_SHIFT +	CURRENT_THREAD_INFO(r9, r1)  	ld	r4,TI_FLAGS(r9)  	andi.	r0,r4,_TIF_NEED_RESCHED  	bne	1b diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S new file mode 100644 index 00000000000..697b390ebfd --- /dev/null +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2012 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/threads.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +/* Hypercall entry point. Will be patched with device tree instructions. */ +.global epapr_hypercall_start +epapr_hypercall_start: +	li	r3, -1 +	nop +	nop +	nop +	blr diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c new file mode 100644 index 00000000000..028aeae370b --- /dev/null +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -0,0 +1,52 @@ +/* + * ePAPR para-virtualization support. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. + * + * Copyright (C) 2012 Freescale Semiconductor, Inc. + */ + +#include <linux/of.h> +#include <asm/epapr_hcalls.h> +#include <asm/cacheflush.h> +#include <asm/code-patching.h> + +bool epapr_paravirt_enabled; + +static int __init epapr_paravirt_init(void) +{ +	struct device_node *hyper_node; +	const u32 *insts; +	int len, i; + +	hyper_node = of_find_node_by_path("/hypervisor"); +	if (!hyper_node) +		return -ENODEV; + +	insts = of_get_property(hyper_node, "hcall-instructions", &len); +	if (!insts) +		return -ENODEV; + +	if (len % 4 || len > (4 * 4)) +		return -ENODEV; + +	for (i = 0; i < (len / 4); i++) +		patch_instruction(epapr_hypercall_start + i, insts[i]); + +	epapr_paravirt_enabled = true; + +	return 0; +} + +early_initcall(epapr_paravirt_init); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 7215cc2495d..98be7f0cd22 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -222,7 +222,7 @@ exc_##n##_bad_stack:							    \   * interrupts happen before the wait instruction.   */  #define CHECK_NAPPING()							\ -	clrrdi	r11,r1,THREAD_SHIFT;					\ +	CURRENT_THREAD_INFO(r11, r1);					\  	ld	r10,TI_LOCAL_FLAGS(r11);				\  	andi.	r9,r10,_TLF_NAPPING;					\  	beq+	1f;							\ @@ -903,7 +903,7 @@ skpinv:	addi	r6,r6,1				/* Increment */  	bne	1b				/* If not, repeat */  	/* Invalidate all TLBs */ -	PPC_TLBILX_ALL(0,0) +	PPC_TLBILX_ALL(0,R0)  	sync  	isync @@ -961,7 +961,7 @@ skpinv:	addi	r6,r6,1				/* Increment */  	tlbwe  	/* Invalidate TLB1 */ -	PPC_TLBILX_ALL(0,0) +	PPC_TLBILX_ALL(0,R0)  	sync  	isync @@ -1020,7 +1020,7 @@ skpinv:	addi	r6,r6,1				/* Increment */  	tlbwe  	/* Invalidate TLB1 */ -	PPC_TLBILX_ALL(0,0) +	PPC_TLBILX_ALL(0,R0)  	sync  	isync @@ -1138,7 +1138,7 @@ a2_tlbinit_after_iprot_flush:  	tlbwe  #endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ -	PPC_TLBILX(0,0,0) +	PPC_TLBILX(0,0,R0)  	sync  	isync diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1c06d297154..e894515e77b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -239,6 +239,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)  	 * out of line to handle them  	 */  	. = 0xe00 +hv_exception_trampoline:  	b	h_data_storage_hv  	. = 0xe20  	b	h_instr_storage_hv @@ -851,7 +852,7 @@ BEGIN_FTR_SECTION  	bne-	do_ste_alloc		/* If so handle it */  END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) -	clrrdi	r11,r1,THREAD_SHIFT +	CURRENT_THREAD_INFO(r11, r1)  	lwz	r0,TI_PREEMPT(r11)	/* If we're in an "NMI" */  	andis.	r0,r0,NMI_MASK@h	/* (i.e. an irq when soft-disabled) */  	bne	77f			/* then don't call hash_page now */ diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index de369558bf0..e0ada05f2df 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -26,7 +26,7 @@  #include <asm/ptrace.h>  #ifdef CONFIG_VSX -#define REST_32FPVSRS(n,c,base)						\ +#define __REST_32FPVSRS(n,c,base)					\  BEGIN_FTR_SECTION							\  	b	2f;							\  END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\ @@ -35,7 +35,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\  2:	REST_32VSRS(n,c,base);						\  3: -#define SAVE_32FPVSRS(n,c,base)						\ +#define __SAVE_32FPVSRS(n,c,base)					\  BEGIN_FTR_SECTION							\  	b	2f;							\  END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\ @@ -44,9 +44,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\  2:	SAVE_32VSRS(n,c,base);						\  3:  #else -#define REST_32FPVSRS(n,b,base)	REST_32FPRS(n, base) -#define SAVE_32FPVSRS(n,b,base)	SAVE_32FPRS(n, base) +#define __REST_32FPVSRS(n,b,base)	REST_32FPRS(n, base) +#define __SAVE_32FPVSRS(n,b,base)	SAVE_32FPRS(n, base)  #endif +#define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) +#define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)  /*   * This task wants to use the FPU now. @@ -79,7 +81,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)  	beq	1f  	toreal(r4)  	addi	r4,r4,THREAD		/* want last_task_used_math->thread */ -	SAVE_32FPVSRS(0, r5, r4) +	SAVE_32FPVSRS(0, R5, R4)  	mffs	fr0  	stfd	fr0,THREAD_FPSCR(r4)  	PPC_LL	r5,PT_REGS(r4) @@ -106,7 +108,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)  #endif  	lfd	fr0,THREAD_FPSCR(r5)  	MTFSF_L(fr0) -	REST_32FPVSRS(0, r4, r5) +	REST_32FPVSRS(0, R4, R5)  #ifndef CONFIG_SMP  	subi	r4,r5,THREAD  	fromreal(r4) @@ -140,7 +142,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)  	addi	r3,r3,THREAD	        /* want THREAD of task */  	PPC_LL	r5,PT_REGS(r3)  	PPC_LCMPI	0,r5,0 -	SAVE_32FPVSRS(0, r4 ,r3) +	SAVE_32FPVSRS(0, R4 ,R3)  	mffs	fr0  	stfd	fr0,THREAD_FPSCR(r3)  	beq	1f diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index bf99cfa6bbf..1fb78561096 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -63,11 +63,9 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)  		return -EINVAL;  	/* replace the text with the new text */ -	if (probe_kernel_write((void *)ip, &new, MCOUNT_INSN_SIZE)) +	if (patch_instruction((unsigned int *)ip, new))  		return -EPERM; -	flush_icache_range(ip, ip + 8); -  	return 0;  } @@ -212,12 +210,9 @@ __ftrace_make_nop(struct module *mod,  	 */  	op = 0x48000008;	/* b +8 */ -	if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) +	if (patch_instruction((unsigned int *)ip, op))  		return -EPERM; - -	flush_icache_range(ip, ip + 8); -  	return 0;  } @@ -245,9 +240,9 @@ __ftrace_make_nop(struct module *mod,  	/*  	 * On PPC32 the trampoline looks like: -	 *  0x3d, 0x60, 0x00, 0x00  lis r11,sym@ha -	 *  0x39, 0x6b, 0x00, 0x00  addi r11,r11,sym@l -	 *  0x7d, 0x69, 0x03, 0xa6  mtctr r11 +	 *  0x3d, 0x80, 0x00, 0x00  lis r12,sym@ha +	 *  0x39, 0x8c, 0x00, 0x00  addi r12,r12,sym@l +	 *  0x7d, 0x89, 0x03, 0xa6  mtctr r12  	 *  0x4e, 0x80, 0x04, 0x20  bctr  	 */ @@ -262,9 +257,9 @@ __ftrace_make_nop(struct module *mod,  	pr_devel(" %08x %08x ", jmp[0], jmp[1]);  	/* verify that this is what we expect it to be */ -	if (((jmp[0] & 0xffff0000) != 0x3d600000) || -	    ((jmp[1] & 0xffff0000) != 0x396b0000) || -	    (jmp[2] != 0x7d6903a6) || +	if (((jmp[0] & 0xffff0000) != 0x3d800000) || +	    ((jmp[1] & 0xffff0000) != 0x398c0000) || +	    (jmp[2] != 0x7d8903a6) ||  	    (jmp[3] != 0x4e800420)) {  		printk(KERN_ERR "Not a trampoline\n");  		return -EINVAL; @@ -286,11 +281,9 @@ __ftrace_make_nop(struct module *mod,  	op = PPC_INST_NOP; -	if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) +	if (patch_instruction((unsigned int *)ip, op))  		return -EPERM; -	flush_icache_range(ip, ip + 8); -  	return 0;  }  #endif /* PPC64 */ @@ -426,11 +419,9 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)  	pr_devel("write to %lx\n", rec->ip); -	if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) +	if (patch_instruction((unsigned int *)ip, op))  		return -EPERM; -	flush_icache_range(ip, ip + 8); -  	return 0;  }  #endif /* CONFIG_PPC64 */ @@ -484,6 +475,58 @@ int ftrace_update_ftrace_func(ftrace_func_t func)  	return ret;  } +static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable) +{ +	unsigned long ftrace_addr = (unsigned long)FTRACE_ADDR; +	int ret; + +	ret = ftrace_update_record(rec, enable); + +	switch (ret) { +	case FTRACE_UPDATE_IGNORE: +		return 0; +	case FTRACE_UPDATE_MAKE_CALL: +		return ftrace_make_call(rec, ftrace_addr); +	case FTRACE_UPDATE_MAKE_NOP: +		return ftrace_make_nop(NULL, rec, ftrace_addr); +	} + +	return 0; +} + +void ftrace_replace_code(int enable) +{ +	struct ftrace_rec_iter *iter; +	struct dyn_ftrace *rec; +	int ret; + +	for (iter = ftrace_rec_iter_start(); iter; +	     iter = ftrace_rec_iter_next(iter)) { +		rec = ftrace_rec_iter_record(iter); +		ret = __ftrace_replace_code(rec, enable); +		if (ret) { +			ftrace_bug(ret, rec->ip); +			return; +		} +	} +} + +void arch_ftrace_update_code(int command) +{ +	if (command & FTRACE_UPDATE_CALLS) +		ftrace_replace_code(1); +	else if (command & FTRACE_DISABLE_CALLS) +		ftrace_replace_code(0); + +	if (command & FTRACE_UPDATE_TRACE_FUNC) +		ftrace_update_ftrace_func(ftrace_trace_function); + +	if (command & FTRACE_START_FUNC_RET) +		ftrace_enable_ftrace_graph_caller(); +	else if (command & FTRACE_STOP_FUNC_RET) +		ftrace_disable_ftrace_graph_caller(); +} +  int __init ftrace_dyn_arch_init(void *data)  {  	/* caller expects data to be zero */ @@ -587,18 +630,17 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)  		return;  	} -	if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY) { -		*parent = old; -		return; -	} -  	trace.func = self_addr; +	trace.depth = current->curr_ret_stack + 1;  	/* Only trace if the calling function expects to */  	if (!ftrace_graph_entry(&trace)) { -		current->curr_ret_stack--;  		*parent = old; +		return;  	} + +	if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY) +		*parent = old;  }  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 1f4434a3860..0f59863c3ad 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -192,7 +192,7 @@ _ENTRY(__early_start)  	li	r0,0  	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) -	rlwinm  r22,r1,0,0,31-THREAD_SHIFT      /* current thread_info */ +	CURRENT_THREAD_INFO(r22, r1)  	stw	r24, TI_CPU(r22)  	bl	early_init @@ -556,8 +556,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)  	/* SPE Unavailable */  	START_EXCEPTION(SPEUnavailable)  	NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL) -	bne	load_up_spe -	addi	r3,r1,STACK_FRAME_OVERHEAD +	beq	1f +	bl	load_up_spe +	b	fast_exception_return +1:	addi	r3,r1,STACK_FRAME_OVERHEAD  	EXC_XFER_EE_LITE(0x2010, KernelSPE)  #else  	EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ @@ -778,7 +780,7 @@ tlb_write_entry:  /* Note that the SPE support is closely modeled after the AltiVec   * support.  Changes to one are likely to be applicable to the   * other!  */ -load_up_spe: +_GLOBAL(load_up_spe)  /*   * Disable SPE for the task which had SPE previously,   * and save its SPE registers in its thread_struct. @@ -826,20 +828,7 @@ load_up_spe:  	subi	r4,r5,THREAD  	stw	r4,last_task_used_spe@l(r3)  #endif /* !CONFIG_SMP */ -	/* restore registers and return */ -2:	REST_4GPRS(3, r11) -	lwz	r10,_CCR(r11) -	REST_GPR(1, r11) -	mtcr	r10 -	lwz	r10,_LINK(r11) -	mtlr	r10 -	REST_GPR(10, r11) -	mtspr	SPRN_SRR1,r9 -	mtspr	SPRN_SRR0,r12 -	REST_GPR(9, r11) -	REST_GPR(12, r11) -	lwz	r11,GPR11(r11) -	rfi +	blr  /*   * SPE unavailable trap from kernel - print a message, but let diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 2bc0584be81..f3a82dde61d 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -111,7 +111,7 @@ void arch_unregister_hw_breakpoint(struct perf_event *bp)  	 * and the single_step_dabr_instruction(), then cleanup the breakpoint  	 * restoration variables to prevent dangling pointers.  	 */ -	if (bp->ctx->task) +	if (bp->ctx && bp->ctx->task)  		bp->ctx->task->thread.last_hit_ubp = NULL;  } diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 15c611de1ee..1686916cc7f 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -135,7 +135,7 @@ BEGIN_FTR_SECTION  	DSSALL  	sync  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -	rlwinm	r9,r1,0,0,31-THREAD_SHIFT	/* current thread_info */ +	CURRENT_THREAD_INFO(r9, r1)  	lwz	r8,TI_LOCAL_FLAGS(r9)	/* set napping bit */  	ori	r8,r8,_TLF_NAPPING	/* so when we take an exception */  	stw	r8,TI_LOCAL_FLAGS(r9)	/* it will return to our caller */ @@ -158,7 +158,7 @@ _GLOBAL(power_save_ppc32_restore)  	stw	r9,_NIP(r11)		/* make it do a blr */  #ifdef CONFIG_SMP -	rlwinm	r12,r11,0,0,31-THREAD_SHIFT +	CURRENT_THREAD_INFO(r12, r11)  	lwz	r11,TI_CPU(r12)		/* get cpu number * 4 */  	slwi	r11,r11,2  #else diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S index ff007b59448..4c7cb400858 100644 --- a/arch/powerpc/kernel/idle_book3e.S +++ b/arch/powerpc/kernel/idle_book3e.S @@ -60,7 +60,7 @@ _GLOBAL(book3e_idle)  1:	/* Let's set the _TLF_NAPPING flag so interrupts make us return  	 * to the right spot  	*/ -	clrrdi	r11,r1,THREAD_SHIFT +	CURRENT_THREAD_INFO(r11, r1)  	ld	r10,TI_LOCAL_FLAGS(r11)  	ori	r10,r10,_TLF_NAPPING  	std	r10,TI_LOCAL_FLAGS(r11) diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S index 4f0ab85f378..15448668988 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_e500.S @@ -21,7 +21,7 @@  	.text  _GLOBAL(e500_idle) -	rlwinm	r3,r1,0,0,31-THREAD_SHIFT	/* current thread_info */ +	CURRENT_THREAD_INFO(r3, r1)  	lwz	r4,TI_LOCAL_FLAGS(r3)	/* set napping bit */  	ori	r4,r4,_TLF_NAPPING	/* so when we take an exception */  	stw	r4,TI_LOCAL_FLAGS(r3)	/* it will return to our caller */ @@ -96,7 +96,7 @@ _GLOBAL(power_save_ppc32_restore)  	stw	r9,_NIP(r11)		/* make it do a blr */  #ifdef CONFIG_SMP -	rlwinm	r12,r1,0,0,31-THREAD_SHIFT +	CURRENT_THREAD_INFO(r12, r1)  	lwz	r11,TI_CPU(r12)		/* get cpu number * 4 */  	slwi	r11,r11,2  #else diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index 2c71b0fc9f9..e3edaa18991 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -59,7 +59,7 @@ BEGIN_FTR_SECTION  	DSSALL  	sync  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -	clrrdi	r9,r1,THREAD_SHIFT	/* current thread_info */ +	CURRENT_THREAD_INFO(r9, r1)  	ld	r8,TI_LOCAL_FLAGS(r9)	/* set napping bit */  	ori	r8,r8,_TLF_NAPPING	/* so when we take an exception */  	std	r8,TI_LOCAL_FLAGS(r9)	/* it will return to our caller */ diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 359f078571c..ff5a6ce027b 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -33,6 +33,9 @@  #include <linux/bitmap.h>  #include <linux/iommu-helper.h>  #include <linux/crash_dump.h> +#include <linux/hash.h> +#include <linux/fault-inject.h> +#include <linux/pci.h>  #include <asm/io.h>  #include <asm/prom.h>  #include <asm/iommu.h> @@ -40,6 +43,7 @@  #include <asm/machdep.h>  #include <asm/kdump.h>  #include <asm/fadump.h> +#include <asm/vio.h>  #define DBG(...) @@ -58,6 +62,114 @@ static int __init setup_iommu(char *str)  __setup("iommu=", setup_iommu); +static DEFINE_PER_CPU(unsigned int, iommu_pool_hash); + +/* + * We precalculate the hash to avoid doing it on every allocation. + * + * The hash is important to spread CPUs across all the pools. For example, + * on a POWER7 with 4 way SMT we want interrupts on the primary threads and + * with 4 pools all primary threads would map to the same pool. + */ +static int __init setup_iommu_pool_hash(void) +{ +	unsigned int i; + +	for_each_possible_cpu(i) +		per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS); + +	return 0; +} +subsys_initcall(setup_iommu_pool_hash); + +#ifdef CONFIG_FAIL_IOMMU + +static DECLARE_FAULT_ATTR(fail_iommu); + +static int __init setup_fail_iommu(char *str) +{ +	return setup_fault_attr(&fail_iommu, str); +} +__setup("fail_iommu=", setup_fail_iommu); + +static bool should_fail_iommu(struct device *dev) +{ +	return dev->archdata.fail_iommu && should_fail(&fail_iommu, 1); +} + +static int __init fail_iommu_debugfs(void) +{ +	struct dentry *dir = fault_create_debugfs_attr("fail_iommu", +						       NULL, &fail_iommu); + +	return IS_ERR(dir) ? PTR_ERR(dir) : 0; +} +late_initcall(fail_iommu_debugfs); + +static ssize_t fail_iommu_show(struct device *dev, +			       struct device_attribute *attr, char *buf) +{ +	return sprintf(buf, "%d\n", dev->archdata.fail_iommu); +} + +static ssize_t fail_iommu_store(struct device *dev, +				struct device_attribute *attr, const char *buf, +				size_t count) +{ +	int i; + +	if (count > 0 && sscanf(buf, "%d", &i) > 0) +		dev->archdata.fail_iommu = (i == 0) ? 0 : 1; + +	return count; +} + +static DEVICE_ATTR(fail_iommu, S_IRUGO|S_IWUSR, fail_iommu_show, +		   fail_iommu_store); + +static int fail_iommu_bus_notify(struct notifier_block *nb, +				 unsigned long action, void *data) +{ +	struct device *dev = data; + +	if (action == BUS_NOTIFY_ADD_DEVICE) { +		if (device_create_file(dev, &dev_attr_fail_iommu)) +			pr_warn("Unable to create IOMMU fault injection sysfs " +				"entries\n"); +	} else if (action == BUS_NOTIFY_DEL_DEVICE) { +		device_remove_file(dev, &dev_attr_fail_iommu); +	} + +	return 0; +} + +static struct notifier_block fail_iommu_bus_notifier = { +	.notifier_call = fail_iommu_bus_notify +}; + +static int __init fail_iommu_setup(void) +{ +#ifdef CONFIG_PCI +	bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier); +#endif +#ifdef CONFIG_IBMVIO +	bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier); +#endif + +	return 0; +} +/* + * Must execute after PCI and VIO subsystem have initialised but before + * devices are probed. + */ +arch_initcall(fail_iommu_setup); +#else +static inline bool should_fail_iommu(struct device *dev) +{ +	return false; +} +#endif +  static unsigned long iommu_range_alloc(struct device *dev,  				       struct iommu_table *tbl,                                         unsigned long npages, @@ -71,6 +183,9 @@ static unsigned long iommu_range_alloc(struct device *dev,  	int pass = 0;  	unsigned long align_mask;  	unsigned long boundary_size; +	unsigned long flags; +	unsigned int pool_nr; +	struct iommu_pool *pool;  	align_mask = 0xffffffffffffffffl >> (64 - align_order); @@ -83,36 +198,49 @@ static unsigned long iommu_range_alloc(struct device *dev,  		return DMA_ERROR_CODE;  	} -	if (handle && *handle) -		start = *handle; +	if (should_fail_iommu(dev)) +		return DMA_ERROR_CODE; + +	/* +	 * We don't need to disable preemption here because any CPU can +	 * safely use any IOMMU pool. +	 */ +	pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1); + +	if (largealloc) +		pool = &(tbl->large_pool);  	else -		start = largealloc ? tbl->it_largehint : tbl->it_hint; +		pool = &(tbl->pools[pool_nr]); -	/* Use only half of the table for small allocs (15 pages or less) */ -	limit = largealloc ? tbl->it_size : tbl->it_halfpoint; +	spin_lock_irqsave(&(pool->lock), flags); -	if (largealloc && start < tbl->it_halfpoint) -		start = tbl->it_halfpoint; +again: +	if ((pass == 0) && handle && *handle) +		start = *handle; +	else +		start = pool->hint; + +	limit = pool->end;  	/* The case below can happen if we have a small segment appended  	 * to a large, or when the previous alloc was at the very end of  	 * the available space. If so, go back to the initial start.  	 */  	if (start >= limit) -		start = largealloc ? tbl->it_largehint : tbl->it_hint; - - again: +		start = pool->start;  	if (limit + tbl->it_offset > mask) {  		limit = mask - tbl->it_offset + 1;  		/* If we're constrained on address range, first try  		 * at the masked hint to avoid O(n) search complexity, -		 * but on second pass, start at 0. +		 * but on second pass, start at 0 in pool 0.  		 */ -		if ((start & mask) >= limit || pass > 0) -			start = 0; -		else +		if ((start & mask) >= limit || pass > 0) { +			pool = &(tbl->pools[0]); +			start = pool->start; +		} else {  			start &= mask; +		}  	}  	if (dev) @@ -126,16 +254,25 @@ static unsigned long iommu_range_alloc(struct device *dev,  			     tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT,  			     align_mask);  	if (n == -1) { -		if (likely(pass < 2)) { -			/* First failure, just rescan the half of the table. -			 * Second failure, rescan the other half of the table. -			 */ -			start = (largealloc ^ pass) ? tbl->it_halfpoint : 0; -			limit = pass ? tbl->it_size : limit; +		if (likely(pass == 0)) { +			/* First try the pool from the start */ +			pool->hint = pool->start;  			pass++;  			goto again; + +		} else if (pass <= tbl->nr_pools) { +			/* Now try scanning all the other pools */ +			spin_unlock(&(pool->lock)); +			pool_nr = (pool_nr + 1) & (tbl->nr_pools - 1); +			pool = &tbl->pools[pool_nr]; +			spin_lock(&(pool->lock)); +			pool->hint = pool->start; +			pass++; +			goto again; +  		} else { -			/* Third failure, give up */ +			/* Give up */ +			spin_unlock_irqrestore(&(pool->lock), flags);  			return DMA_ERROR_CODE;  		}  	} @@ -145,10 +282,10 @@ static unsigned long iommu_range_alloc(struct device *dev,  	/* Bump the hint to a new block for small allocs. */  	if (largealloc) {  		/* Don't bump to new block to avoid fragmentation */ -		tbl->it_largehint = end; +		pool->hint = end;  	} else {  		/* Overflow will be taken care of at the next allocation */ -		tbl->it_hint = (end + tbl->it_blocksize - 1) & +		pool->hint = (end + tbl->it_blocksize - 1) &  		                ~(tbl->it_blocksize - 1);  	} @@ -156,6 +293,8 @@ static unsigned long iommu_range_alloc(struct device *dev,  	if (handle)  		*handle = end; +	spin_unlock_irqrestore(&(pool->lock), flags); +  	return n;  } @@ -165,18 +304,14 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,  			      unsigned long mask, unsigned int align_order,  			      struct dma_attrs *attrs)  { -	unsigned long entry, flags; +	unsigned long entry;  	dma_addr_t ret = DMA_ERROR_CODE;  	int build_fail; -	spin_lock_irqsave(&(tbl->it_lock), flags); -  	entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order); -	if (unlikely(entry == DMA_ERROR_CODE)) { -		spin_unlock_irqrestore(&(tbl->it_lock), flags); +	if (unlikely(entry == DMA_ERROR_CODE))  		return DMA_ERROR_CODE; -	}  	entry += tbl->it_offset;	/* Offset into real TCE table */  	ret = entry << IOMMU_PAGE_SHIFT;	/* Set the return dma address */ @@ -193,8 +328,6 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,  	 */  	if (unlikely(build_fail)) {  		__iommu_free(tbl, ret, npages); - -		spin_unlock_irqrestore(&(tbl->it_lock), flags);  		return DMA_ERROR_CODE;  	} @@ -202,16 +335,14 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,  	if (ppc_md.tce_flush)  		ppc_md.tce_flush(tbl); -	spin_unlock_irqrestore(&(tbl->it_lock), flags); -  	/* Make sure updates are seen by hardware */  	mb();  	return ret;  } -static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,  -			 unsigned int npages) +static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr, +			     unsigned int npages)  {  	unsigned long entry, free_entry; @@ -231,20 +362,57 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,  			printk(KERN_INFO "\tindex     = 0x%llx\n", (u64)tbl->it_index);  			WARN_ON(1);  		} -		return; + +		return false; +	} + +	return true; +} + +static struct iommu_pool *get_pool(struct iommu_table *tbl, +				   unsigned long entry) +{ +	struct iommu_pool *p; +	unsigned long largepool_start = tbl->large_pool.start; + +	/* The large pool is the last pool at the top of the table */ +	if (entry >= largepool_start) { +		p = &tbl->large_pool; +	} else { +		unsigned int pool_nr = entry / tbl->poolsize; + +		BUG_ON(pool_nr > tbl->nr_pools); +		p = &tbl->pools[pool_nr];  	} +	return p; +} + +static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, +			 unsigned int npages) +{ +	unsigned long entry, free_entry; +	unsigned long flags; +	struct iommu_pool *pool; + +	entry = dma_addr >> IOMMU_PAGE_SHIFT; +	free_entry = entry - tbl->it_offset; + +	pool = get_pool(tbl, free_entry); + +	if (!iommu_free_check(tbl, dma_addr, npages)) +		return; +  	ppc_md.tce_free(tbl, entry, npages); + +	spin_lock_irqsave(&(pool->lock), flags);  	bitmap_clear(tbl->it_map, free_entry, npages); +	spin_unlock_irqrestore(&(pool->lock), flags);  }  static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,  		unsigned int npages)  { -	unsigned long flags; - -	spin_lock_irqsave(&(tbl->it_lock), flags); -  	__iommu_free(tbl, dma_addr, npages);  	/* Make sure TLB cache is flushed if the HW needs it. We do @@ -253,8 +421,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,  	 */  	if (ppc_md.tce_flush)  		ppc_md.tce_flush(tbl); - -	spin_unlock_irqrestore(&(tbl->it_lock), flags);  }  int iommu_map_sg(struct device *dev, struct iommu_table *tbl, @@ -263,7 +429,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,  		 struct dma_attrs *attrs)  {  	dma_addr_t dma_next = 0, dma_addr; -	unsigned long flags;  	struct scatterlist *s, *outs, *segstart;  	int outcount, incount, i, build_fail = 0;  	unsigned int align; @@ -285,8 +450,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,  	DBG("sg mapping %d elements:\n", nelems); -	spin_lock_irqsave(&(tbl->it_lock), flags); -  	max_seg_size = dma_get_max_seg_size(dev);  	for_each_sg(sglist, s, nelems, i) {  		unsigned long vaddr, npages, entry, slen; @@ -369,8 +532,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,  	if (ppc_md.tce_flush)  		ppc_md.tce_flush(tbl); -	spin_unlock_irqrestore(&(tbl->it_lock), flags); -  	DBG("mapped %d elements:\n", outcount);  	/* For the sake of iommu_unmap_sg, we clear out the length in the @@ -402,7 +563,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,  		if (s == outs)  			break;  	} -	spin_unlock_irqrestore(&(tbl->it_lock), flags);  	return 0;  } @@ -412,15 +572,12 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,  		struct dma_attrs *attrs)  {  	struct scatterlist *sg; -	unsigned long flags;  	BUG_ON(direction == DMA_NONE);  	if (!tbl)  		return; -	spin_lock_irqsave(&(tbl->it_lock), flags); -  	sg = sglist;  	while (nelems--) {  		unsigned int npages; @@ -440,8 +597,6 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,  	 */  	if (ppc_md.tce_flush)  		ppc_md.tce_flush(tbl); - -	spin_unlock_irqrestore(&(tbl->it_lock), flags);  }  static void iommu_table_clear(struct iommu_table *tbl) @@ -494,9 +649,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)  	unsigned long sz;  	static int welcomed = 0;  	struct page *page; - -	/* Set aside 1/4 of the table for large allocations. */ -	tbl->it_halfpoint = tbl->it_size * 3 / 4; +	unsigned int i; +	struct iommu_pool *p;  	/* number of bytes needed for the bitmap */  	sz = (tbl->it_size + 7) >> 3; @@ -515,9 +669,28 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)  	if (tbl->it_offset == 0)  		set_bit(0, tbl->it_map); -	tbl->it_hint = 0; -	tbl->it_largehint = tbl->it_halfpoint; -	spin_lock_init(&tbl->it_lock); +	/* We only split the IOMMU table if we have 1GB or more of space */ +	if ((tbl->it_size << IOMMU_PAGE_SHIFT) >= (1UL * 1024 * 1024 * 1024)) +		tbl->nr_pools = IOMMU_NR_POOLS; +	else +		tbl->nr_pools = 1; + +	/* We reserve the top 1/4 of the table for large allocations */ +	tbl->poolsize = (tbl->it_size * 3 / 4) / tbl->nr_pools; + +	for (i = 0; i < tbl->nr_pools; i++) { +		p = &tbl->pools[i]; +		spin_lock_init(&(p->lock)); +		p->start = tbl->poolsize * i; +		p->hint = p->start; +		p->end = p->start + tbl->poolsize; +	} + +	p = &tbl->large_pool; +	spin_lock_init(&(p->lock)); +	p->start = tbl->poolsize * i; +	p->hint = p->start; +	p->end = tbl->it_size;  	iommu_table_clear(tbl); diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 62bdf238966..867db1de894 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -31,6 +31,7 @@  #include <asm/cacheflush.h>  #include <asm/disassemble.h>  #include <asm/ppc-opcode.h> +#include <asm/epapr_hcalls.h>  #define KVM_MAGIC_PAGE		(-4096L)  #define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x) @@ -302,7 +303,7 @@ static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)  	if (imm_one) {  		p[kvm_emulate_wrtee_reg_offs] = -			KVM_INST_LI | __PPC_RT(30) | MSR_EE; +			KVM_INST_LI | __PPC_RT(R30) | MSR_EE;  	} else {  		/* Make clobbered registers work too */  		switch (get_rt(rt)) { @@ -726,7 +727,7 @@ unsigned long kvm_hypercall(unsigned long *in,  	unsigned long register r11 asm("r11") = nr;  	unsigned long register r12 asm("r12"); -	asm volatile("bl	kvm_hypercall_start" +	asm volatile("bl	epapr_hypercall_start"  		     : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),  		       "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11),  		       "=r"(r12) @@ -747,29 +748,6 @@ unsigned long kvm_hypercall(unsigned long *in,  }  EXPORT_SYMBOL_GPL(kvm_hypercall); -static int kvm_para_setup(void) -{ -	extern u32 kvm_hypercall_start; -	struct device_node *hyper_node; -	u32 *insts; -	int len, i; - -	hyper_node = of_find_node_by_path("/hypervisor"); -	if (!hyper_node) -		return -1; - -	insts = (u32*)of_get_property(hyper_node, "hcall-instructions", &len); -	if (len % 4) -		return -1; -	if (len > (4 * 4)) -		return -1; - -	for (i = 0; i < (len / 4); i++) -		kvm_patch_ins(&(&kvm_hypercall_start)[i], insts[i]); - -	return 0; -} -  static __init void kvm_free_tmp(void)  {  	unsigned long start, end; @@ -791,7 +769,7 @@ static int __init kvm_guest_init(void)  	if (!kvm_para_available())  		goto free_tmp; -	if (kvm_para_setup()) +	if (!epapr_paravirt_enabled)  		goto free_tmp;  	if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE)) diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S index e291cf3cf95..e100ff324a8 100644 --- a/arch/powerpc/kernel/kvm_emul.S +++ b/arch/powerpc/kernel/kvm_emul.S @@ -24,16 +24,6 @@  #include <asm/page.h>  #include <asm/asm-offsets.h> -/* Hypercall entry point. Will be patched with device tree instructions. */ - -.global kvm_hypercall_start -kvm_hypercall_start: -	li	r3, -1 -	nop -	nop -	nop -	blr -  #define KVM_MAGIC_PAGE		(-4096)  #ifdef CONFIG_64BIT @@ -132,7 +122,7 @@ kvm_emulate_mtmsrd_len:  	.long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4 -#define MSR_SAFE_BITS (MSR_EE | MSR_CE | MSR_ME | MSR_RI) +#define MSR_SAFE_BITS (MSR_EE | MSR_RI)  #define MSR_CRITICAL_BITS ~MSR_SAFE_BITS  .global kvm_emulate_mtmsr diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 386d57f66f2..407e293aad2 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -179,7 +179,7 @@ _GLOBAL(low_choose_750fx_pll)  	mtspr	SPRN_HID1,r4  	/* Store new HID1 image */ -	rlwinm	r6,r1,0,0,(31-THREAD_SHIFT) +	CURRENT_THREAD_INFO(r6, r1)  	lwz	r6,TI_CPU(r6)  	slwi	r6,r6,2  	addis	r6,r6,nap_save_hid1@ha @@ -699,7 +699,7 @@ _GLOBAL(kernel_thread)  #ifdef CONFIG_SMP  _GLOBAL(start_secondary_resume)  	/* Reset stack */ -	rlwinm	r1,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */ +	CURRENT_THREAD_INFO(r1, r1)  	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD  	li	r3,0  	stw	r3,0(r1)		/* Zero the stack frame pointer	*/ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 616921ef143..565b78625a3 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -301,11 +301,6 @@ _GLOBAL(real_writeb)  #ifdef CONFIG_PPC_PASEMI -/* No support in all binutils for these yet, so use defines */ -#define LBZCIX(RT,RA,RB)  .long (0x7c0006aa|(RT<<21)|(RA<<16)|(RB << 11)) -#define STBCIX(RS,RA,RB)  .long (0x7c0007aa|(RS<<21)|(RA<<16)|(RB << 11)) - -  _GLOBAL(real_205_readb)  	mfmsr	r7  	ori	r0,r7,MSR_DR @@ -314,7 +309,7 @@ _GLOBAL(real_205_readb)  	mtmsrd	r0  	sync  	isync -	LBZCIX(r3,0,r3) +	LBZCIX(R3,R0,R3)  	isync  	mtmsrd	r7  	sync @@ -329,7 +324,7 @@ _GLOBAL(real_205_writeb)  	mtmsrd	r0  	sync  	isync -	STBCIX(r3,0,r4) +	STBCIX(R3,R0,R4)  	isync  	mtmsrd	r7  	sync diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 8e78e93c818..2aa04f29e1d 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -200,11 +200,6 @@ int pcibios_add_platform_entries(struct pci_dev *pdev)  	return device_create_file(&pdev->dev, &dev_attr_devspec);  } -char __devinit *pcibios_setup(char *str) -{ -	return str; -} -  /*   * Reads the interrupt pin to determine if interrupt is use by card.   * If the interrupt is used, then gets the interrupt line from the @@ -248,8 +243,7 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)  	} else {  		pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n",  			 oirq.size, oirq.specifier[0], oirq.specifier[1], -			 oirq.controller ? oirq.controller->full_name : -			 "<default>"); +			 of_node_full_name(oirq.controller));  		virq = irq_create_of_mapping(oirq.controller, oirq.specifier,  					     oirq.size); @@ -1628,8 +1622,7 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose)  	struct device_node *node = hose->dn;  	int mode; -	pr_debug("PCI: Scanning PHB %s\n", -		 node ? node->full_name : "<NO NAME>"); +	pr_debug("PCI: Scanning PHB %s\n", of_node_full_name(node));  	/* Get some IO space for the new PHB */  	pcibios_setup_phb_io_space(hose); @@ -1637,6 +1630,11 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose)  	/* Wire up PHB bus resources */  	pcibios_setup_phb_resources(hose, &resources); +	hose->busn.start = hose->first_busno; +	hose->busn.end	 = hose->last_busno; +	hose->busn.flags = IORESOURCE_BUS; +	pci_add_resource(&resources, &hose->busn); +  	/* Create an empty bus for the toplevel */  	bus = pci_create_root_bus(hose->parent, hose->first_busno,  				  hose->ops, hose, &resources); @@ -1646,7 +1644,6 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose)  		pci_free_resource_list(&resources);  		return;  	} -	bus->secondary = hose->first_busno;  	hose->bus = bus;  	/* Get probe mode and perform scan */ @@ -1654,13 +1651,14 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose)  	if (node && ppc_md.pci_probe_mode)  		mode = ppc_md.pci_probe_mode(bus);  	pr_debug("    probe mode: %d\n", mode); -	if (mode == PCI_PROBE_DEVTREE) { -		bus->subordinate = hose->last_busno; +	if (mode == PCI_PROBE_DEVTREE)  		of_scan_bus(node, bus); -	} -	if (mode == PCI_PROBE_NORMAL) -		hose->last_busno = bus->subordinate = pci_scan_child_bus(bus); +	if (mode == PCI_PROBE_NORMAL) { +		pci_bus_update_busn_res_end(bus, 255); +		hose->last_busno = pci_scan_child_bus(bus); +		pci_bus_update_busn_res_end(bus, hose->last_busno); +	}  	/* Platform gets a chance to do some global fixups before  	 * we proceed to resource allocation diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 94a54f61d34..4ff190ff24a 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -236,7 +236,7 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,  	for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) {  		bus = pci_bus_b(ln); -		if (in_bus >= bus->number && in_bus <= bus->subordinate) +		if (in_bus >= bus->number && in_bus <= bus->busn_res.end)  			break;  		bus = NULL;  	} diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 89dde171a6f..30378a19f65 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -198,7 +198,6 @@ EXPORT_SYMBOL(of_create_pci_dev);  /**   * of_scan_pci_bridge - Set up a PCI bridge and scan for child nodes - * @node: device tree node of bridge   * @dev: pci_dev structure for the bridge   *   * of_scan_bus() calls this routine for each PCI bridge that it finds, and @@ -240,7 +239,7 @@ void __devinit of_scan_pci_bridge(struct pci_dev *dev)  	}  	bus->primary = dev->bus->number; -	bus->subordinate = busrange[1]; +	pci_bus_insert_busn_res(bus, busrange[0], busrange[1]);  	bus->bridge_ctl = 0;  	/* parse ranges property */ diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 4174b4b2324..2c0ee640563 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -709,7 +709,7 @@ static int __init rtas_flash_init(void)  	if (rtas_token("ibm,update-flash-64-and-reboot") ==  		       RTAS_UNKNOWN_SERVICE) { -		printk(KERN_ERR "rtas_flash: no firmware flash support\n"); +		pr_info("rtas_flash: no firmware flash support\n");  		return 1;  	} diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index afd4f051f3f..bdc499c1787 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -720,6 +720,33 @@ static int powerpc_debugfs_init(void)  arch_initcall(powerpc_debugfs_init);  #endif +#ifdef CONFIG_BOOKE_WDT +extern u32 booke_wdt_enabled; +extern u32 booke_wdt_period; + +/* Checks wdt=x and wdt_period=xx command-line option */ +notrace int __init early_parse_wdt(char *p) +{ +	if (p && strncmp(p, "0", 1) != 0) +		booke_wdt_enabled = 1; + +	return 0; +} +early_param("wdt", early_parse_wdt); + +int __init early_parse_wdt_period(char *p) +{ +	unsigned long ret; +	if (p) { +		if (!kstrtol(p, 0, &ret)) +			booke_wdt_period = ret; +	} + +	return 0; +} +early_param("wdt_period", early_parse_wdt_period); +#endif	/* CONFIG_BOOKE_WDT */ +  void ppc_printk_progress(char *s, unsigned short hex)  {  	pr_info("%s\n", s); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index ec8a53fa9e8..a8f54ecb091 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -149,30 +149,6 @@ notrace void __init machine_init(u64 dt_ptr)  		ppc_md.progress("id mach(): done", 0x200);  } -#ifdef CONFIG_BOOKE_WDT -extern u32 booke_wdt_enabled; -extern u32 booke_wdt_period; - -/* Checks wdt=x and wdt_period=xx command-line option */ -notrace int __init early_parse_wdt(char *p) -{ -	if (p && strncmp(p, "0", 1) != 0) -	       booke_wdt_enabled = 1; - -	return 0; -} -early_param("wdt", early_parse_wdt); - -int __init early_parse_wdt_period (char *p) -{ -	if (p) -		booke_wdt_period = simple_strtoul(p, NULL, 0); - -	return 0; -} -early_param("wdt_period", early_parse_wdt_period); -#endif	/* CONFIG_BOOKE_WDT */ -  /* Checks "l2cr=xxxx" command-line option */  int __init ppc_setup_l2cr(char *str)  { diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index e4cb34322de..0321007086f 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -48,6 +48,7 @@  #ifdef CONFIG_PPC64  #include <asm/paca.h>  #endif +#include <asm/vdso.h>  #include <asm/debug.h>  #ifdef DEBUG @@ -570,8 +571,9 @@ void __devinit start_secondary(void *unused)  #ifdef CONFIG_PPC64  	if (system_state == SYSTEM_RUNNING)  		vdso_data->processorCount++; + +	vdso_getcpu_init();  #endif -	ipi_call_lock();  	notify_cpu_starting(cpu);  	set_cpu_online(cpu, true);  	/* Update sibling maps */ @@ -601,7 +603,6 @@ void __devinit start_secondary(void *unused)  		of_node_put(np);  	}  	of_node_put(l2_cache); -	ipi_call_unlock();  	local_irq_enable(); diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 9eb5b9b536a..b67db22e102 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -706,6 +706,34 @@ static void __init vdso_setup_syscall_map(void)  	}  } +#ifdef CONFIG_PPC64 +int __cpuinit vdso_getcpu_init(void) +{ +	unsigned long cpu, node, val; + +	/* +	 * SPRG3 contains the CPU in the bottom 16 bits and the NUMA node in +	 * the next 16 bits. The VDSO uses this to implement getcpu(). +	 */ +	cpu = get_cpu(); +	WARN_ON_ONCE(cpu > 0xffff); + +	node = cpu_to_node(cpu); +	WARN_ON_ONCE(node > 0xffff); + +	val = (cpu & 0xfff) | ((node & 0xffff) << 16); +	mtspr(SPRN_SPRG3, val); +#ifdef CONFIG_KVM_BOOK3S_HANDLER +	get_paca()->kvm_hstate.sprg3 = val; +#endif + +	put_cpu(); + +	return 0; +} +/* We need to call this before SMP init */ +early_initcall(vdso_getcpu_init); +#endif  static int __init vdso_init(void)  { diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 9a7946c4173..53e6c9b979e 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -1,7 +1,9 @@  # List of files in the vdso, has to be asm only for now -obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o +obj-vdso32-$(CONFIG_PPC64) = getcpu.o +obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \ +		$(obj-vdso32-y)  # Build rules diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S new file mode 100644 index 00000000000..47afd08c90f --- /dev/null +++ b/arch/powerpc/kernel/vdso32/getcpu.S @@ -0,0 +1,45 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2012 + * + * Author: Anton Blanchard <anton@au.ibm.com> + */ +#include <asm/ppc_asm.h> +#include <asm/vdso.h> + +	.text +/* + * Exact prototype of getcpu + * + * int __kernel_getcpu(unsigned *cpu, unsigned *node); + * + */ +V_FUNCTION_BEGIN(__kernel_getcpu) +  .cfi_startproc +	mfspr	r5,SPRN_USPRG3 +	cmpdi	cr0,r3,0 +	cmpdi	cr1,r4,0 +	clrlwi  r6,r5,16 +	rlwinm  r7,r5,16,31-15,31-0 +	beq	cr0,1f +	stw	r6,0(r3) +1:	beq	cr1,2f +	stw	r7,0(r4) +2:	crclr	cr0*4+so +	li	r3,0			/* always success */ +	blr +  .cfi_endproc +V_FUNCTION_END(__kernel_getcpu) diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 0546bcd49cd..43200ba2e57 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -147,6 +147,9 @@ VERSION  		__kernel_sync_dicache_p5;  		__kernel_sigtramp32;  		__kernel_sigtramp_rt32; +#ifdef CONFIG_PPC64 +		__kernel_getcpu; +#endif  	local: *;  	}; diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 8c500d8622e..effca9404b1 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -1,6 +1,6 @@  # List of files in the vdso, has to be asm only for now -obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o +obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o  # Build rules diff --git a/arch/powerpc/kernel/vdso64/getcpu.S b/arch/powerpc/kernel/vdso64/getcpu.S new file mode 100644 index 00000000000..47afd08c90f --- /dev/null +++ b/arch/powerpc/kernel/vdso64/getcpu.S @@ -0,0 +1,45 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2012 + * + * Author: Anton Blanchard <anton@au.ibm.com> + */ +#include <asm/ppc_asm.h> +#include <asm/vdso.h> + +	.text +/* + * Exact prototype of getcpu + * + * int __kernel_getcpu(unsigned *cpu, unsigned *node); + * + */ +V_FUNCTION_BEGIN(__kernel_getcpu) +  .cfi_startproc +	mfspr	r5,SPRN_USPRG3 +	cmpdi	cr0,r3,0 +	cmpdi	cr1,r4,0 +	clrlwi  r6,r5,16 +	rlwinm  r7,r5,16,31-15,31-0 +	beq	cr0,1f +	stw	r6,0(r3) +1:	beq	cr1,2f +	stw	r7,0(r4) +2:	crclr	cr0*4+so +	li	r3,0			/* always success */ +	blr +  .cfi_endproc +V_FUNCTION_END(__kernel_getcpu) diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 0e615404e24..e6c1758f358 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -146,6 +146,7 @@ VERSION  		__kernel_sync_dicache;  		__kernel_sync_dicache_p5;  		__kernel_sigtramp_rt64; +		__kernel_getcpu;  	local: *;  	}; diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index cb87301ccd5..02b32216bbc 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -37,8 +37,6 @@  #include <asm/page.h>  #include <asm/hvcall.h> -static struct bus_type vio_bus_type; -  static struct vio_dev vio_bus_device  = { /* fake "parent" device */  	.name = "vio",  	.type = "", @@ -613,6 +611,7 @@ static u64 vio_dma_get_required_mask(struct device *dev)  struct dma_map_ops vio_dma_mapping_ops = {  	.alloc             = vio_dma_iommu_alloc_coherent,  	.free              = vio_dma_iommu_free_coherent, +	.mmap		   = dma_direct_mmap_coherent,  	.map_sg            = vio_dma_iommu_map_sg,  	.unmap_sg          = vio_dma_iommu_unmap_sg,  	.map_page          = vio_dma_iommu_map_page, @@ -625,7 +624,7 @@ struct dma_map_ops vio_dma_mapping_ops = {   * vio_cmo_set_dev_desired - Set desired entitlement for a device   *   * @viodev: struct vio_dev for device to alter - * @new_desired: new desired entitlement level in bytes + * @desired: new desired entitlement level in bytes   *   * For use by devices to request a change to their entitlement at runtime or   * through sysfs.  The desired entitlement level is changed and a balancing @@ -1262,7 +1261,7 @@ static int vio_bus_remove(struct device *dev)  /**   * vio_register_driver: - Register a new vio driver - * @drv:	The vio_driver structure to be registered. + * @viodrv:	The vio_driver structure to be registered.   */  int __vio_register_driver(struct vio_driver *viodrv, struct module *owner,  			  const char *mod_name) @@ -1282,7 +1281,7 @@ EXPORT_SYMBOL(__vio_register_driver);  /**   * vio_unregister_driver - Remove registration of vio driver. - * @driver:	The vio_driver struct to be removed form registration + * @viodrv:	The vio_driver struct to be removed form registration   */  void vio_unregister_driver(struct vio_driver *viodrv)  { @@ -1296,8 +1295,7 @@ static void __devinit vio_dev_release(struct device *dev)  	struct iommu_table *tbl = get_iommu_table_base(dev);  	if (tbl) -		iommu_free_table(tbl, dev->of_node ? -			dev->of_node->full_name : dev_name(dev)); +		iommu_free_table(tbl, of_node_full_name(dev->of_node));  	of_node_put(dev->of_node);  	kfree(to_vio_dev(dev));  } @@ -1397,21 +1395,27 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)  	viodev->name = of_node->name;  	viodev->dev.of_node = of_node_get(of_node); -	if (firmware_has_feature(FW_FEATURE_CMO)) -		vio_cmo_set_dma_ops(viodev); -	else -		set_dma_ops(&viodev->dev, &dma_iommu_ops); -	set_iommu_table_base(&viodev->dev, vio_build_iommu_table(viodev));  	set_dev_node(&viodev->dev, of_node_to_nid(of_node));  	/* init generic 'struct device' fields: */  	viodev->dev.parent = &vio_bus_device.dev;  	viodev->dev.bus = &vio_bus_type;  	viodev->dev.release = vio_dev_release; -        /* needed to ensure proper operation of coherent allocations -         * later, in case driver doesn't set it explicitly */ -        dma_set_mask(&viodev->dev, DMA_BIT_MASK(64)); -        dma_set_coherent_mask(&viodev->dev, DMA_BIT_MASK(64)); + +	if (of_get_property(viodev->dev.of_node, "ibm,my-dma-window", NULL)) { +		if (firmware_has_feature(FW_FEATURE_CMO)) +			vio_cmo_set_dma_ops(viodev); +		else +			set_dma_ops(&viodev->dev, &dma_iommu_ops); + +		set_iommu_table_base(&viodev->dev, +				     vio_build_iommu_table(viodev)); + +		/* needed to ensure proper operation of coherent allocations +		 * later, in case driver doesn't set it explicitly */ +		dma_set_mask(&viodev->dev, DMA_BIT_MASK(64)); +		dma_set_coherent_mask(&viodev->dev, DMA_BIT_MASK(64)); +	}  	/* register with generic device framework */  	if (device_register(&viodev->dev)) { @@ -1491,12 +1495,18 @@ static int __init vio_bus_init(void)  	if (firmware_has_feature(FW_FEATURE_CMO))  		vio_cmo_bus_init(); +	return 0; +} +postcore_initcall(vio_bus_init); + +static int __init vio_device_init(void) +{  	vio_bus_scan_register_devices("vdevice");  	vio_bus_scan_register_devices("ibm,platform-facilities");  	return 0;  } -__initcall(vio_bus_init); +device_initcall(vio_device_init);  static ssize_t name_show(struct device *dev,  		struct device_attribute *attr, char *buf) @@ -1509,7 +1519,7 @@ static ssize_t devspec_show(struct device *dev,  {  	struct device_node *of_node = dev->of_node; -	return sprintf(buf, "%s\n", of_node ? of_node->full_name : "none"); +	return sprintf(buf, "%s\n", of_node_full_name(of_node));  }  static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, @@ -1568,7 +1578,7 @@ static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env)  	return 0;  } -static struct bus_type vio_bus_type = { +struct bus_type vio_bus_type = {  	.name = "vio",  	.dev_attrs = vio_dev_attrs,  	.uevent = vio_hotplug,  |