diff options
Diffstat (limited to 'arch/powerpc/kernel')
25 files changed, 1126 insertions, 499 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 58d0572de6f..77d831a1cc3 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -34,9 +34,10 @@ obj-y				+= vdso32/  obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \  				   signal_64.o ptrace32.o \  				   paca.o nvram_64.o firmware.o +obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o  obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o  obj64-$(CONFIG_RELOCATABLE)	+= reloc_64.o -obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o +obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o  obj-$(CONFIG_PPC64)		+= vdso64/  obj-$(CONFIG_ALTIVEC)		+= vecemu.o  obj-$(CONFIG_PPC_970_NAP)	+= idle_power4.o @@ -67,6 +68,7 @@ obj64-$(CONFIG_HIBERNATION)	+= swsusp_asm64.o  obj-$(CONFIG_MODULES)		+= module.o module_$(CONFIG_WORD_SIZE).o  obj-$(CONFIG_44x)		+= cpu_setup_44x.o  obj-$(CONFIG_FSL_BOOKE)		+= cpu_setup_fsl_booke.o dbell.o +obj-$(CONFIG_PPC_BOOK3E_64)	+= dbell.o  extra-y				:= head_$(CONFIG_WORD_SIZE).o  extra-$(CONFIG_PPC_BOOK3E_32)	:= head_new_booke.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 496cc5b3984..1c0607ddccc 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -194,7 +194,6 @@ int main(void)  	DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr));  	DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));  	DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); -	DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset));  	DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));  #ifdef CONFIG_KVM_BOOK3S_64_HANDLER  	DEFINE(PACA_KVM_SVCPU, offsetof(struct paca_struct, shadow_vcpu)); @@ -342,6 +341,7 @@ int main(void)  	DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));  	DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));  	DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime)); +	DEFINE(STAMP_SEC_FRAC, offsetof(struct vdso_data, stamp_sec_fraction));  	DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size));  	DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size));  	DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size)); diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 87aa0f3c604..65e2b4e10f9 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1364,10 +1364,10 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.machine_check		= machine_check_4xx,  		.platform		= "ppc405",  	}, -	{	/* 405EX */ -		.pvr_mask		= 0xffff0004, -		.pvr_value		= 0x12910004, -		.cpu_name		= "405EX", +	{	/* 405EX Rev. A/B with Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x12910007, +		.cpu_name		= "405EX Rev. A/B",  		.cpu_features		= CPU_FTRS_40X,  		.cpu_user_features	= PPC_FEATURE_32 |  			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, @@ -1377,10 +1377,114 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.machine_check		= machine_check_4xx,  		.platform		= "ppc405",  	}, -	{	/* 405EXr */ -		.pvr_mask		= 0xffff0004, +	{	/* 405EX Rev. C without Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x1291000d, +		.cpu_name		= "405EX Rev. C", +		.cpu_features		= CPU_FTRS_40X, +		.cpu_user_features	= PPC_FEATURE_32 | +			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, +		.mmu_features		= MMU_FTR_TYPE_40x, +		.icache_bsize		= 32, +		.dcache_bsize		= 32, +		.machine_check		= machine_check_4xx, +		.platform		= "ppc405", +	}, +	{	/* 405EX Rev. C with Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x1291000f, +		.cpu_name		= "405EX Rev. C", +		.cpu_features		= CPU_FTRS_40X, +		.cpu_user_features	= PPC_FEATURE_32 | +			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, +		.mmu_features		= MMU_FTR_TYPE_40x, +		.icache_bsize		= 32, +		.dcache_bsize		= 32, +		.machine_check		= machine_check_4xx, +		.platform		= "ppc405", +	}, +	{	/* 405EX Rev. D without Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x12910003, +		.cpu_name		= "405EX Rev. D", +		.cpu_features		= CPU_FTRS_40X, +		.cpu_user_features	= PPC_FEATURE_32 | +			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, +		.mmu_features		= MMU_FTR_TYPE_40x, +		.icache_bsize		= 32, +		.dcache_bsize		= 32, +		.machine_check		= machine_check_4xx, +		.platform		= "ppc405", +	}, +	{	/* 405EX Rev. D with Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x12910005, +		.cpu_name		= "405EX Rev. D", +		.cpu_features		= CPU_FTRS_40X, +		.cpu_user_features	= PPC_FEATURE_32 | +			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, +		.mmu_features		= MMU_FTR_TYPE_40x, +		.icache_bsize		= 32, +		.dcache_bsize		= 32, +		.machine_check		= machine_check_4xx, +		.platform		= "ppc405", +	}, +	{	/* 405EXr Rev. A/B without Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x12910001, +		.cpu_name		= "405EXr Rev. A/B", +		.cpu_features		= CPU_FTRS_40X, +		.cpu_user_features	= PPC_FEATURE_32 | +			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, +		.mmu_features		= MMU_FTR_TYPE_40x, +		.icache_bsize		= 32, +		.dcache_bsize		= 32, +		.machine_check		= machine_check_4xx, +		.platform		= "ppc405", +	}, +	{	/* 405EXr Rev. C without Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x12910009, +		.cpu_name		= "405EXr Rev. C", +		.cpu_features		= CPU_FTRS_40X, +		.cpu_user_features	= PPC_FEATURE_32 | +			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, +		.mmu_features		= MMU_FTR_TYPE_40x, +		.icache_bsize		= 32, +		.dcache_bsize		= 32, +		.machine_check		= machine_check_4xx, +		.platform		= "ppc405", +	}, +	{	/* 405EXr Rev. C with Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x1291000b, +		.cpu_name		= "405EXr Rev. C", +		.cpu_features		= CPU_FTRS_40X, +		.cpu_user_features	= PPC_FEATURE_32 | +			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, +		.mmu_features		= MMU_FTR_TYPE_40x, +		.icache_bsize		= 32, +		.dcache_bsize		= 32, +		.machine_check		= machine_check_4xx, +		.platform		= "ppc405", +	}, +	{	/* 405EXr Rev. D without Security */ +		.pvr_mask		= 0xffff000f,  		.pvr_value		= 0x12910000, -		.cpu_name		= "405EXr", +		.cpu_name		= "405EXr Rev. D", +		.cpu_features		= CPU_FTRS_40X, +		.cpu_user_features	= PPC_FEATURE_32 | +			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, +		.mmu_features		= MMU_FTR_TYPE_40x, +		.icache_bsize		= 32, +		.dcache_bsize		= 32, +		.machine_check		= machine_check_4xx, +		.platform		= "ppc405", +	}, +	{	/* 405EXr Rev. D with Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x12910002, +		.cpu_name		= "405EXr Rev. D",  		.cpu_features		= CPU_FTRS_40X,  		.cpu_user_features	= PPC_FEATURE_32 |  			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 40f524643ba..8e05c16344e 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -128,9 +128,9 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,  	if (!csize)  		return 0; -	csize = min(csize, PAGE_SIZE); +	csize = min_t(size_t, csize, PAGE_SIZE); -	if (pfn < max_pfn) { +	if ((min_low_pfn < pfn) && (pfn < max_pfn)) {  		vaddr = __va(pfn << PAGE_SHIFT);  		csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);  	} else { diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 1493734cd87..3307a52d797 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -13,32 +13,88 @@  #include <linux/kernel.h>  #include <linux/smp.h>  #include <linux/threads.h> +#include <linux/percpu.h>  #include <asm/dbell.h> +#include <asm/irq_regs.h>  #ifdef CONFIG_SMP -unsigned long dbell_smp_message[NR_CPUS]; +struct doorbell_cpu_info { +	unsigned long	messages;	/* current messages bits */ +	unsigned int	tag;		/* tag value */ +}; -void smp_dbell_message_pass(int target, int msg) +static DEFINE_PER_CPU(struct doorbell_cpu_info, doorbell_cpu_info); + +void doorbell_setup_this_cpu(void) +{ +	struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); + +	info->messages = 0; +	info->tag = mfspr(SPRN_PIR) & 0x3fff; +} + +void doorbell_message_pass(int target, int msg)  { +	struct doorbell_cpu_info *info;  	int i; -	if(target < NR_CPUS) { -		set_bit(msg, &dbell_smp_message[target]); -		ppc_msgsnd(PPC_DBELL, 0, target); +	if (target < NR_CPUS) { +		info = &per_cpu(doorbell_cpu_info, target); +		set_bit(msg, &info->messages); +		ppc_msgsnd(PPC_DBELL, 0, info->tag);  	} -	else if(target == MSG_ALL_BUT_SELF) { +	else if (target == MSG_ALL_BUT_SELF) {  		for_each_online_cpu(i) {  			if (i == smp_processor_id())  				continue; -			set_bit(msg, &dbell_smp_message[i]); -			ppc_msgsnd(PPC_DBELL, 0, i); +			info = &per_cpu(doorbell_cpu_info, i); +			set_bit(msg, &info->messages); +			ppc_msgsnd(PPC_DBELL, 0, info->tag);  		}  	}  	else { /* target == MSG_ALL */ -		for_each_online_cpu(i) -			set_bit(msg, &dbell_smp_message[i]); +		for_each_online_cpu(i) { +			info = &per_cpu(doorbell_cpu_info, i); +			set_bit(msg, &info->messages); +		}  		ppc_msgsnd(PPC_DBELL, PPC_DBELL_MSG_BRDCAST, 0);  	}  } -#endif + +void doorbell_exception(struct pt_regs *regs) +{ +	struct pt_regs *old_regs = set_irq_regs(regs); +	struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); +	int msg; + +	/* Warning: regs can be NULL when called from irq enable */ + +	if (!info->messages || (num_online_cpus() < 2)) +		goto out; + +	for (msg = 0; msg < 4; msg++) +		if (test_and_clear_bit(msg, &info->messages)) +			smp_message_recv(msg); + +out: +	set_irq_regs(old_regs); +} + +void doorbell_check_self(void) +{ +	struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); + +	if (!info->messages) +		return; + +	ppc_msgsnd(PPC_DBELL, 0, info->tag); +} + +#else /* CONFIG_SMP */ +void doorbell_exception(struct pt_regs *regs) +{ +	printk(KERN_WARNING "Received doorbell on non-smp system\n"); +} +#endif /* CONFIG_SMP */ + diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 24dcc0ecf24..5c43063d250 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -191,6 +191,12 @@ exc_##n##_bad_stack:							    \  	sth	r1,PACA_TRAP_SAVE(r13);	/* store trap */		    \  	b	bad_stack_book3e;	/* bad stack error */ +/* WARNING: If you change the layout of this stub, make sure you chcek +	*   the debug exception handler which handles single stepping +	*   into exceptions from userspace, and the MM code in +	*   arch/powerpc/mm/tlb_nohash.c which patches the branch here +	*   and would need to be updated if that branch is moved +	*/  #define	EXCEPTION_STUB(loc, label)					\  	. = interrupt_base_book3e + loc;				\  	nop;	/* To make debug interrupts happy */			\ @@ -204,11 +210,30 @@ exc_##n##_bad_stack:							    \  	lis	r,TSR_FIS@h;						\  	mtspr	SPRN_TSR,r +/* Used by asynchronous interrupt that may happen in the idle loop. + * + * This check if the thread was in the idle loop, and if yes, returns + * to the caller rather than the PC. This is to avoid a race if + * interrupts happen before the wait instruction. + */ +#define CHECK_NAPPING()							\ +	clrrdi	r11,r1,THREAD_SHIFT;					\ +	ld	r10,TI_LOCAL_FLAGS(r11);				\ +	andi.	r9,r10,_TLF_NAPPING;					\ +	beq+	1f;							\ +	ld	r8,_LINK(r1);						\ +	rlwinm	r7,r10,0,~_TLF_NAPPING;					\ +	std	r8,_NIP(r1);						\ +	std	r7,TI_LOCAL_FLAGS(r11);					\ +1: + +  #define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack)			\  	START_EXCEPTION(label);						\  	NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE)	\  	EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE_ALL)		\  	ack(r8);							\ +	CHECK_NAPPING();						\  	addi	r3,r1,STACK_FRAME_OVERHEAD;				\  	bl	hdlr;							\  	b	.ret_from_except_lite; @@ -246,11 +271,9 @@ interrupt_base_book3e:					/* fake trap */  	EXCEPTION_STUB(0x1a0, watchdog)			/* 0x09f0 */  	EXCEPTION_STUB(0x1c0, data_tlb_miss)  	EXCEPTION_STUB(0x1e0, instruction_tlb_miss) +	EXCEPTION_STUB(0x280, doorbell) +	EXCEPTION_STUB(0x2a0, doorbell_crit) -#if 0 -	EXCEPTION_STUB(0x280, processor_doorbell) -	EXCEPTION_STUB(0x220, processor_doorbell_crit) -#endif  	.globl interrupt_end_book3e  interrupt_end_book3e: @@ -259,6 +282,7 @@ interrupt_end_book3e:  	CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE)  //	EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE_ALL)  //	bl	special_reg_save_crit +//	CHECK_NAPPING();  //	addi	r3,r1,STACK_FRAME_OVERHEAD  //	bl	.critical_exception  //	b	ret_from_crit_except @@ -270,6 +294,7 @@ interrupt_end_book3e:  //	EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE_ALL)  //	bl	special_reg_save_mc  //	addi	r3,r1,STACK_FRAME_OVERHEAD +//	CHECK_NAPPING();  //	bl	.machine_check_exception  //	b	ret_from_mc_except  	b	. @@ -340,6 +365,7 @@ interrupt_end_book3e:  	CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE)  //	EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE_ALL)  //	bl	special_reg_save_crit +//	CHECK_NAPPING();  //	addi	r3,r1,STACK_FRAME_OVERHEAD  //	bl	.unknown_exception  //	b	ret_from_crit_except @@ -428,6 +454,20 @@ interrupt_end_book3e:  kernel_dbg_exc:  	b	.	/* NYI */ +/* Doorbell interrupt */ +	MASKABLE_EXCEPTION(0x2070, doorbell, .doorbell_exception, ACK_NONE) + +/* Doorbell critical Interrupt */ +	START_EXCEPTION(doorbell_crit); +	CRIT_EXCEPTION_PROLOG(0x2080, PROLOG_ADDITION_NONE) +//	EXCEPTION_COMMON(0x2080, PACA_EXCRIT, INTS_DISABLE_ALL) +//	bl	special_reg_save_crit +//	CHECK_NAPPING(); +//	addi	r3,r1,STACK_FRAME_OVERHEAD +//	bl	.doorbell_critical_exception +//	b	ret_from_crit_except +	b	. +  /*   * An interrupt came in while soft-disabled; clear EE in SRR1, @@ -563,6 +603,8 @@ BAD_STACK_TRAMPOLINE(0xd00)  BAD_STACK_TRAMPOLINE(0xe00)  BAD_STACK_TRAMPOLINE(0xf00)  BAD_STACK_TRAMPOLINE(0xf20) +BAD_STACK_TRAMPOLINE(0x2070) +BAD_STACK_TRAMPOLINE(0x2080)  	.globl	bad_stack_book3e  bad_stack_book3e: diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3e423fbad6b..f53029a0155 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -828,6 +828,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)  /* We have a data breakpoint exception - handle it */  handle_dabr_fault: +	bl	.save_nvgprs  	ld      r4,_DAR(r1)  	ld      r5,_DSISR(r1)  	addi    r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c new file mode 100644 index 00000000000..5ecd0401cdb --- /dev/null +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -0,0 +1,364 @@ +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. Derived from + * "arch/x86/kernel/hw_breakpoint.c" + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2010 IBM Corporation + * Author: K.Prasad <prasad@linux.vnet.ibm.com> + * + */ + +#include <linux/hw_breakpoint.h> +#include <linux/notifier.h> +#include <linux/kprobes.h> +#include <linux/percpu.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/smp.h> + +#include <asm/hw_breakpoint.h> +#include <asm/processor.h> +#include <asm/sstep.h> +#include <asm/uaccess.h> + +/* + * Stores the breakpoints currently in use on each breakpoint address + * register for every cpu + */ +static DEFINE_PER_CPU(struct perf_event *, bp_per_reg); + +/* + * Returns total number of data or instruction breakpoints available. + */ +int hw_breakpoint_slots(int type) +{ +	if (type == TYPE_DATA) +		return HBP_NUM; +	return 0;		/* no instruction breakpoints available */ +} + +/* + * Install a perf counter breakpoint. + * + * We seek a free debug address register and use it for this + * breakpoint. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. + */ +int arch_install_hw_breakpoint(struct perf_event *bp) +{ +	struct arch_hw_breakpoint *info = counter_arch_bp(bp); +	struct perf_event **slot = &__get_cpu_var(bp_per_reg); + +	*slot = bp; + +	/* +	 * Do not install DABR values if the instruction must be single-stepped. +	 * If so, DABR will be populated in single_step_dabr_instruction(). +	 */ +	if (current->thread.last_hit_ubp != bp) +		set_dabr(info->address | info->type | DABR_TRANSLATION); + +	return 0; +} + +/* + * Uninstall the breakpoint contained in the given counter. + * + * First we search the debug address register it uses and then we disable + * it. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. + */ +void arch_uninstall_hw_breakpoint(struct perf_event *bp) +{ +	struct perf_event **slot = &__get_cpu_var(bp_per_reg); + +	if (*slot != bp) { +		WARN_ONCE(1, "Can't find the breakpoint"); +		return; +	} + +	*slot = NULL; +	set_dabr(0); +} + +/* + * Perform cleanup of arch-specific counters during unregistration + * of the perf-event + */ +void arch_unregister_hw_breakpoint(struct perf_event *bp) +{ +	/* +	 * If the breakpoint is unregistered between a hw_breakpoint_handler() +	 * and the single_step_dabr_instruction(), then cleanup the breakpoint +	 * restoration variables to prevent dangling pointers. +	 */ +	if (bp->ctx->task) +		bp->ctx->task->thread.last_hit_ubp = NULL; +} + +/* + * Check for virtual address in kernel space. + */ +int arch_check_bp_in_kernelspace(struct perf_event *bp) +{ +	struct arch_hw_breakpoint *info = counter_arch_bp(bp); + +	return is_kernel_addr(info->address); +} + +int arch_bp_generic_fields(int type, int *gen_bp_type) +{ +	switch (type) { +	case DABR_DATA_READ: +		*gen_bp_type = HW_BREAKPOINT_R; +		break; +	case DABR_DATA_WRITE: +		*gen_bp_type = HW_BREAKPOINT_W; +		break; +	case (DABR_DATA_WRITE | DABR_DATA_READ): +		*gen_bp_type = (HW_BREAKPOINT_W | HW_BREAKPOINT_R); +		break; +	default: +		return -EINVAL; +	} +	return 0; +} + +/* + * Validate the arch-specific HW Breakpoint register settings + */ +int arch_validate_hwbkpt_settings(struct perf_event *bp) +{ +	int ret = -EINVAL; +	struct arch_hw_breakpoint *info = counter_arch_bp(bp); + +	if (!bp) +		return ret; + +	switch (bp->attr.bp_type) { +	case HW_BREAKPOINT_R: +		info->type = DABR_DATA_READ; +		break; +	case HW_BREAKPOINT_W: +		info->type = DABR_DATA_WRITE; +		break; +	case HW_BREAKPOINT_R | HW_BREAKPOINT_W: +		info->type = (DABR_DATA_READ | DABR_DATA_WRITE); +		break; +	default: +		return ret; +	} + +	info->address = bp->attr.bp_addr; +	info->len = bp->attr.bp_len; + +	/* +	 * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8) +	 * and breakpoint addresses are aligned to nearest double-word +	 * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the +	 * 'symbolsize' should satisfy the check below. +	 */ +	if (info->len > +	    (HW_BREAKPOINT_LEN - (info->address & HW_BREAKPOINT_ALIGN))) +		return -EINVAL; +	return 0; +} + +/* + * Restores the breakpoint on the debug registers. + * Invoke this function if it is known that the execution context is + * about to change to cause loss of MSR_SE settings. + */ +void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) +{ +	struct arch_hw_breakpoint *info; + +	if (likely(!tsk->thread.last_hit_ubp)) +		return; + +	info = counter_arch_bp(tsk->thread.last_hit_ubp); +	regs->msr &= ~MSR_SE; +	set_dabr(info->address | info->type | DABR_TRANSLATION); +	tsk->thread.last_hit_ubp = NULL; +} + +/* + * Handle debug exception notifications. + */ +int __kprobes hw_breakpoint_handler(struct die_args *args) +{ +	int rc = NOTIFY_STOP; +	struct perf_event *bp; +	struct pt_regs *regs = args->regs; +	int stepped = 1; +	struct arch_hw_breakpoint *info; +	unsigned int instr; +	unsigned long dar = regs->dar; + +	/* Disable breakpoints during exception handling */ +	set_dabr(0); + +	/* +	 * The counter may be concurrently released but that can only +	 * occur from a call_rcu() path. We can then safely fetch +	 * the breakpoint, use its callback, touch its counter +	 * while we are in an rcu_read_lock() path. +	 */ +	rcu_read_lock(); + +	bp = __get_cpu_var(bp_per_reg); +	if (!bp) +		goto out; +	info = counter_arch_bp(bp); + +	/* +	 * Return early after invoking user-callback function without restoring +	 * DABR if the breakpoint is from ptrace which always operates in +	 * one-shot mode. The ptrace-ed process will receive the SIGTRAP signal +	 * generated in do_dabr(). +	 */ +	if (bp->overflow_handler == ptrace_triggered) { +		perf_bp_event(bp, regs); +		rc = NOTIFY_DONE; +		goto out; +	} + +	/* +	 * Verify if dar lies within the address range occupied by the symbol +	 * being watched to filter extraneous exceptions.  If it doesn't, +	 * we still need to single-step the instruction, but we don't +	 * generate an event. +	 */ +	info->extraneous_interrupt = !((bp->attr.bp_addr <= dar) && +			(dar - bp->attr.bp_addr < bp->attr.bp_len)); + +	/* Do not emulate user-space instructions, instead single-step them */ +	if (user_mode(regs)) { +		bp->ctx->task->thread.last_hit_ubp = bp; +		regs->msr |= MSR_SE; +		goto out; +	} + +	stepped = 0; +	instr = 0; +	if (!__get_user_inatomic(instr, (unsigned int *) regs->nip)) +		stepped = emulate_step(regs, instr); + +	/* +	 * emulate_step() could not execute it. We've failed in reliably +	 * handling the hw-breakpoint. Unregister it and throw a warning +	 * message to let the user know about it. +	 */ +	if (!stepped) { +		WARN(1, "Unable to handle hardware breakpoint. Breakpoint at " +			"0x%lx will be disabled.", info->address); +		perf_event_disable(bp); +		goto out; +	} +	/* +	 * As a policy, the callback is invoked in a 'trigger-after-execute' +	 * fashion +	 */ +	if (!info->extraneous_interrupt) +		perf_bp_event(bp, regs); + +	set_dabr(info->address | info->type | DABR_TRANSLATION); +out: +	rcu_read_unlock(); +	return rc; +} + +/* + * Handle single-step exceptions following a DABR hit. + */ +int __kprobes single_step_dabr_instruction(struct die_args *args) +{ +	struct pt_regs *regs = args->regs; +	struct perf_event *bp = NULL; +	struct arch_hw_breakpoint *bp_info; + +	bp = current->thread.last_hit_ubp; +	/* +	 * Check if we are single-stepping as a result of a +	 * previous HW Breakpoint exception +	 */ +	if (!bp) +		return NOTIFY_DONE; + +	bp_info = counter_arch_bp(bp); + +	/* +	 * We shall invoke the user-defined callback function in the single +	 * stepping handler to confirm to 'trigger-after-execute' semantics +	 */ +	if (!bp_info->extraneous_interrupt) +		perf_bp_event(bp, regs); + +	set_dabr(bp_info->address | bp_info->type | DABR_TRANSLATION); +	current->thread.last_hit_ubp = NULL; + +	/* +	 * If the process was being single-stepped by ptrace, let the +	 * other single-step actions occur (e.g. generate SIGTRAP). +	 */ +	if (test_thread_flag(TIF_SINGLESTEP)) +		return NOTIFY_DONE; + +	return NOTIFY_STOP; +} + +/* + * Handle debug exception notifications. + */ +int __kprobes hw_breakpoint_exceptions_notify( +		struct notifier_block *unused, unsigned long val, void *data) +{ +	int ret = NOTIFY_DONE; + +	switch (val) { +	case DIE_DABR_MATCH: +		ret = hw_breakpoint_handler(data); +		break; +	case DIE_SSTEP: +		ret = single_step_dabr_instruction(data); +		break; +	} + +	return ret; +} + +/* + * Release the user breakpoints used by ptrace + */ +void flush_ptrace_hw_breakpoint(struct task_struct *tsk) +{ +	struct thread_struct *t = &tsk->thread; + +	unregister_hw_breakpoint(t->ptrace_bps[0]); +	t->ptrace_bps[0] = NULL; +} + +void hw_breakpoint_pmu_read(struct perf_event *bp) +{ +	/* TODO */ +} diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S new file mode 100644 index 00000000000..16c002d6bdf --- /dev/null +++ b/arch/powerpc/kernel/idle_book3e.S @@ -0,0 +1,86 @@ +/* + * Copyright 2010 IBM Corp, Benjamin Herrenschmidt <benh@kernel.crashing.org> + * + * Generic idle routine for Book3E processors + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/threads.h> +#include <asm/reg.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ppc-opcode.h> +#include <asm/processor.h> +#include <asm/thread_info.h> + +/* 64-bit version only for now */ +#ifdef CONFIG_PPC64 + +_GLOBAL(book3e_idle) +	/* Save LR for later */ +	mflr	r0 +	std	r0,16(r1) + +	/* Hard disable interrupts */ +	wrteei	0 + +	/* Now check if an interrupt came in while we were soft disabled +	 * since we may otherwise lose it (doorbells etc...). We know +	 * that since PACAHARDIRQEN will have been cleared in that case. +	 */ +	lbz	r3,PACAHARDIRQEN(r13) +	cmpwi	cr0,r3,0 +	beqlr + +	/* Now we are going to mark ourselves as soft and hard enables in +	 * order to be able to take interrupts while asleep. We inform lockdep +	 * of that. We don't actually turn interrupts on just yet tho. +	 */ +#ifdef CONFIG_TRACE_IRQFLAGS +	stdu    r1,-128(r1) +	bl	.trace_hardirqs_on +#endif +	li	r0,1 +	stb	r0,PACASOFTIRQEN(r13) +	stb	r0,PACAHARDIRQEN(r13) +	 +	/* Interrupts will make use return to LR, so get something we want +	 * in there +	 */ +	bl	1f + +	/* Hard disable interrupts again */ +	wrteei	0 + +	/* Mark them off again in the PACA as well */ +	li	r0,0 +	stb	r0,PACASOFTIRQEN(r13) +	stb	r0,PACAHARDIRQEN(r13) + +	/* Tell lockdep about it */ +#ifdef CONFIG_TRACE_IRQFLAGS +	bl	.trace_hardirqs_off +	addi    r1,r1,128 +#endif +	ld	r0,16(r1) +	mtlr	r0 +	blr + +1:	/* Let's set the _TLF_NAPPING flag so interrupts make us return +	 * to the right spot +	*/ +	clrrdi	r11,r1,THREAD_SHIFT +	ld	r10,TI_LOCAL_FLAGS(r11) +	ori	r10,r10,_TLF_NAPPING +	std	r10,TI_LOCAL_FLAGS(r11) + +	/* We can now re-enable hard interrupts and go to sleep */ +	wrteei	1 +1:	PPC_WAIT(0) +	b	1b + +#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 77be3d058a6..8f96d319890 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -64,6 +64,8 @@  #include <asm/ptrace.h>  #include <asm/machdep.h>  #include <asm/udbg.h> +#include <asm/dbell.h> +  #ifdef CONFIG_PPC64  #include <asm/paca.h>  #include <asm/firmware.h> @@ -153,14 +155,28 @@ notrace void raw_local_irq_restore(unsigned long en)  	if (get_hard_enabled())  		return; +#if defined(CONFIG_BOOKE) && defined(CONFIG_SMP) +	/* Check for pending doorbell interrupts and resend to ourself */ +	doorbell_check_self(); +#endif +  	/*  	 * Need to hard-enable interrupts here.  Since currently disabled,  	 * no need to take further asm precautions against preemption; but  	 * use local_paca instead of get_paca() to avoid preemption checking.  	 */  	local_paca->hard_enabled = en; + +#ifndef CONFIG_BOOKE +	/* On server, re-trigger the decrementer if it went negative since +	 * some processors only trigger on edge transitions of the sign bit. +	 * +	 * BookE has a level sensitive decrementer (latches in TSR) so we +	 * don't need that +	 */  	if ((int)mfspr(SPRN_DEC) < 0)  		mtspr(SPRN_DEC, 1); +#endif /* CONFIG_BOOKE */  	/*  	 * Force the delivery of pending soft-disabled interrupts on PS3. diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 89f005116aa..dd6c141f166 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -45,6 +45,18 @@ void machine_kexec_cleanup(struct kimage *image)  		ppc_md.machine_kexec_cleanup(image);  } +void arch_crash_save_vmcoreinfo(void) +{ + +#ifdef CONFIG_NEED_MULTIPLE_NODES +	VMCOREINFO_SYMBOL(node_data); +	VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); +#endif +#ifndef CONFIG_NEED_MULTIPLE_NODES +	VMCOREINFO_SYMBOL(contig_page_data); +#endif +} +  /*   * Do not allocate memory (or fail in any way) in machine_kexec().   * We are past the point of no return, committed to rebooting now. @@ -144,24 +156,24 @@ int overlaps_crashkernel(unsigned long start, unsigned long size)  }  /* Values we need to export to the second kernel via the device tree. */ -static unsigned long kernel_end; -static unsigned long crashk_size; +static phys_addr_t kernel_end; +static phys_addr_t crashk_size;  static struct property kernel_end_prop = {  	.name = "linux,kernel-end", -	.length = sizeof(unsigned long), +	.length = sizeof(phys_addr_t),  	.value = &kernel_end,  };  static struct property crashk_base_prop = {  	.name = "linux,crashkernel-base", -	.length = sizeof(unsigned long), +	.length = sizeof(phys_addr_t),  	.value = &crashk_res.start,  };  static struct property crashk_size_prop = {  	.name = "linux,crashkernel-size", -	.length = sizeof(unsigned long), +	.length = sizeof(phys_addr_t),  	.value = &crashk_size,  }; diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index ed31a29c4ff..583af70c4b1 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -15,6 +15,8 @@  #include <linux/thread_info.h>  #include <linux/init_task.h>  #include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/cpu.h>  #include <asm/page.h>  #include <asm/current.h> @@ -25,6 +27,7 @@  #include <asm/sections.h>	/* _end */  #include <asm/prom.h>  #include <asm/smp.h> +#include <asm/hw_breakpoint.h>  int default_machine_kexec_prepare(struct kimage *image)  { @@ -165,6 +168,7 @@ static void kexec_smp_down(void *arg)  	while(kexec_all_irq_disabled == 0)  		cpu_relax();  	mb(); /* make sure all irqs are disabled before this */ +	hw_breakpoint_disable();  	/*  	 * Now every CPU has IRQs off, we can clear out any pending  	 * IPIs and be sure that no more will come in after this. @@ -180,8 +184,22 @@ static void kexec_prepare_cpus_wait(int wait_state)  {  	int my_cpu, i, notified=-1; +	hw_breakpoint_disable();  	my_cpu = get_cpu(); -	/* Make sure each CPU has atleast made it to the state we need */ +	/* Make sure each CPU has at least made it to the state we need. +	 * +	 * FIXME: There is a (slim) chance of a problem if not all of the CPUs +	 * are correctly onlined.  If somehow we start a CPU on boot with RTAS +	 * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in +	 * time, the boot CPU will timeout.  If it does eventually execute +	 * stuff, the secondary will start up (paca[].cpu_start was written) and +	 * get into a peculiar state.  If the platform supports +	 * smp_ops->take_timebase(), the secondary CPU will probably be spinning +	 * in there.  If not (i.e. pseries), the secondary will continue on and +	 * try to online itself/idle/etc. If it survives that, we need to find +	 * these possible-but-not-online-but-should-be CPUs and chaperone them +	 * into kexec_smp_wait(). +	 */  	for_each_online_cpu(i) {  		if (i == my_cpu)  			continue; @@ -189,9 +207,9 @@ static void kexec_prepare_cpus_wait(int wait_state)  		while (paca[i].kexec_state < wait_state) {  			barrier();  			if (i != notified) { -				printk( "kexec: waiting for cpu %d (physical" -						" %d) to enter %i state\n", -					i, paca[i].hw_cpu_id, wait_state); +				printk(KERN_INFO "kexec: waiting for cpu %d " +				       "(physical %d) to enter %i state\n", +				       i, paca[i].hw_cpu_id, wait_state);  				notified = i;  			}  		} @@ -199,9 +217,32 @@ static void kexec_prepare_cpus_wait(int wait_state)  	mb();  } -static void kexec_prepare_cpus(void) +/* + * We need to make sure each present CPU is online.  The next kernel will scan + * the device tree and assume primary threads are online and query secondary + * threads via RTAS to online them if required.  If we don't online primary + * threads, they will be stuck.  However, we also online secondary threads as we + * may be using 'cede offline'.  In this case RTAS doesn't see the secondary + * threads as offline -- and again, these CPUs will be stuck. + * + * So, we online all CPUs that should be running, including secondary threads. + */ +static void wake_offline_cpus(void)  { +	int cpu = 0; +	for_each_present_cpu(cpu) { +		if (!cpu_online(cpu)) { +			printk(KERN_INFO "kexec: Waking offline cpu %d.\n", +			       cpu); +			cpu_up(cpu); +		} +	} +} + +static void kexec_prepare_cpus(void) +{ +	wake_offline_cpus();  	smp_call_function(kexec_smp_down, NULL, /* wait */0);  	local_irq_disable();  	mb(); /* make sure IRQs are disabled before we say they are */ @@ -215,7 +256,10 @@ static void kexec_prepare_cpus(void)  	if (ppc_md.kexec_cpu_down)  		ppc_md.kexec_cpu_down(0, 0); -	/* Before removing MMU mapings make sure all CPUs have entered real mode */ +	/* +	 * Before removing MMU mappings make sure all CPUs have entered real +	 * mode: +	 */  	kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);  	put_cpu(); @@ -257,6 +301,12 @@ static void kexec_prepare_cpus(void)  static union thread_union kexec_stack __init_task_data =  	{ }; +/* + * For similar reasons to the stack above, the kexecing CPU needs to be on a + * static PACA; we switch to kexec_paca. + */ +struct paca_struct kexec_paca; +  /* Our assembly helper, in kexec_stub.S */  extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,  					void *image, void *control, @@ -278,12 +328,28 @@ void default_machine_kexec(struct kimage *image)  	if (crashing_cpu == -1)  		kexec_prepare_cpus(); +	pr_debug("kexec: Starting switchover sequence.\n"); +  	/* switch to a staticly allocated stack.  Based on irq stack code.  	 * XXX: the task struct will likely be invalid once we do the copy!  	 */  	kexec_stack.thread_info.task = current_thread_info()->task;  	kexec_stack.thread_info.flags = 0; +	/* We need a static PACA, too; copy this CPU's PACA over and switch to +	 * it.  Also poison per_cpu_offset to catch anyone using non-static +	 * data. +	 */ +	memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct)); +	kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL; +	paca = (struct paca_struct *)RELOC_HIDE(&kexec_paca, 0) - +		kexec_paca.paca_index; +	setup_paca(&kexec_paca); + +	/* XXX: If anyone does 'dynamic lppacas' this will also need to be +	 * switched to a static version! +	 */ +  	/* Some things are best done in assembly.  Finding globals with  	 * a toc is easier in C, so pass in what we can.  	 */ diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 139a773853f..d0a26f1770f 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -105,6 +105,16 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)  #endif /* CONFIG_PPC_STD_MMU_64 */  } +/* Put the paca pointer into r13 and SPRG_PACA */ +void setup_paca(struct paca_struct *new_paca) +{ +	local_paca = new_paca; +	mtspr(SPRN_SPRG_PACA, local_paca); +#ifdef CONFIG_PPC_BOOK3E +	mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); +#endif +} +  static int __initdata paca_size;  void __init allocate_pacas(void) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 773424df828..551f6713ff4 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -37,6 +37,7 @@  #include <linux/kernel_stat.h>  #include <linux/personality.h>  #include <linux/random.h> +#include <linux/hw_breakpoint.h>  #include <asm/pgtable.h>  #include <asm/uaccess.h> @@ -462,14 +463,42 @@ struct task_struct *__switch_to(struct task_struct *prev,  #ifdef CONFIG_PPC_ADV_DEBUG_REGS  	switch_booke_debug_regs(&new->thread);  #else +/* + * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would + * schedule DABR + */ +#ifndef CONFIG_HAVE_HW_BREAKPOINT  	if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr))  		set_dabr(new->thread.dabr); +#endif /* CONFIG_HAVE_HW_BREAKPOINT */  #endif  	new_thread = &new->thread;  	old_thread = ¤t->thread; +#if defined(CONFIG_PPC_BOOK3E_64) +	/* XXX Current Book3E code doesn't deal with kernel side DBCR0, +	 * we always hold the user values, so we set it now. +	 * +	 * However, we ensure the kernel MSR:DE is appropriately cleared too +	 * to avoid spurrious single step exceptions in the kernel. +	 * +	 * This will have to change to merge with the ppc32 code at some point, +	 * but I don't like much what ppc32 is doing today so there's some +	 * thinking needed there +	 */ +	if ((new_thread->dbcr0 | old_thread->dbcr0) & DBCR0_IDM) { +		u32 dbcr0; + +		mtmsr(mfmsr() & ~MSR_DE); +		isync(); +		dbcr0 = mfspr(SPRN_DBCR0); +		dbcr0 = (dbcr0 & DBCR0_EDM) | new_thread->dbcr0; +		mtspr(SPRN_DBCR0, dbcr0); +	} +#endif /* CONFIG_PPC64_BOOK3E */ +  #ifdef CONFIG_PPC64  	/*  	 * Collect processor utilization data per process @@ -642,7 +671,11 @@ void flush_thread(void)  {  	discard_lazy_cpu_state(); +#ifdef CONFIG_HAVE_HW_BREAKPOINTS +	flush_ptrace_hw_breakpoint(current); +#else /* CONFIG_HAVE_HW_BREAKPOINTS */  	set_debug_reg_defaults(¤t->thread); +#endif /* CONFIG_HAVE_HW_BREAKPOINTS */  }  void @@ -660,6 +693,9 @@ void prepare_to_copy(struct task_struct *tsk)  	flush_altivec_to_thread(current);  	flush_vsx_to_thread(current);  	flush_spe_to_thread(current); +#ifdef CONFIG_HAVE_HW_BREAKPOINT +	flush_ptrace_hw_breakpoint(tsk); +#endif /* CONFIG_HAVE_HW_BREAKPOINT */  }  /* diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 3b6f8ae9b8c..941ff4dbc56 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -311,6 +311,24 @@ static void __init prom_print_hex(unsigned long val)  	call_prom("write", 3, 1, _prom->stdout, buf, nibbles);  } +/* max number of decimal digits in an unsigned long */ +#define UL_DIGITS 21 +static void __init prom_print_dec(unsigned long val) +{ +	int i, size; +	char buf[UL_DIGITS+1]; +	struct prom_t *_prom = &RELOC(prom); + +	for (i = UL_DIGITS-1; i >= 0;  i--) { +		buf[i] = (val % 10) + '0'; +		val = val/10; +		if (val == 0) +			break; +	} +	/* shift stuff down */ +	size = UL_DIGITS - i; +	call_prom("write", 3, 1, _prom->stdout, buf+i, size); +}  static void __init prom_printf(const char *format, ...)  { @@ -350,6 +368,14 @@ static void __init prom_printf(const char *format, ...)  			v = va_arg(args, unsigned long);  			prom_print_hex(v);  			break; +		case 'l': +			++q; +			if (*q == 'u') { /* '%lu' */ +				++q; +				v = va_arg(args, unsigned long); +				prom_print_dec(v); +			} +			break;  		}  	}  } @@ -835,11 +861,11 @@ static int __init prom_count_smt_threads(void)  		if (plen == PROM_ERROR)  			break;  		plen >>= 2; -		prom_debug("Found 0x%x smt threads per core\n", (unsigned long)plen); +		prom_debug("Found %lu smt threads per core\n", (unsigned long)plen);  		/* Sanity check */  		if (plen < 1 || plen > 64) { -			prom_printf("Threads per core 0x%x out of bounds, assuming 1\n", +			prom_printf("Threads per core %lu out of bounds, assuming 1\n",  				    (unsigned long)plen);  			return 1;  		} @@ -869,12 +895,12 @@ static void __init prom_send_capabilities(void)  		cores = (u32 *)PTRRELOC(&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]);  		if (*cores != NR_CPUS) {  			prom_printf("WARNING ! " -				    "ibm_architecture_vec structure inconsistent: 0x%x !\n", +				    "ibm_architecture_vec structure inconsistent: %lu!\n",  				    *cores);  		} else {  			*cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); -			prom_printf("Max number of cores passed to firmware: 0x%x\n", -				    (unsigned long)*cores); +			prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", +				    *cores, NR_CPUS);  		}  		/* try calling the ibm,client-architecture-support method */ @@ -1482,7 +1508,7 @@ static void __init prom_hold_cpus(void)  		reg = -1;  		prom_getprop(node, "reg", ®, sizeof(reg)); -		prom_debug("cpu hw idx   = 0x%x\n", reg); +		prom_debug("cpu hw idx   = %lu\n", reg);  		/* Init the acknowledge var which will be reset by  		 * the secondary cpu when it awakens from its OF @@ -1492,7 +1518,7 @@ static void __init prom_hold_cpus(void)  		if (reg != _prom->cpu) {  			/* Primary Thread of non-boot cpu */ -			prom_printf("starting cpu hw idx %x... ", reg); +			prom_printf("starting cpu hw idx %lu... ", reg);  			call_prom("start-cpu", 3, 0, node,  				  secondary_hold, reg); @@ -1507,7 +1533,7 @@ static void __init prom_hold_cpus(void)  		}  #ifdef CONFIG_SMP  		else -			prom_printf("boot cpu hw idx %x\n", reg); +			prom_printf("boot cpu hw idx %lu\n", reg);  #endif /* CONFIG_SMP */  	} @@ -2420,7 +2446,7 @@ static void __init prom_find_boot_cpu(void)  	prom_getprop(cpu_pkg, "reg", &getprop_rval, sizeof(getprop_rval));  	_prom->cpu = getprop_rval; -	prom_debug("Booting CPU hw index = 0x%x\n", _prom->cpu); +	prom_debug("Booting CPU hw index = %lu\n", _prom->cpu);  }  static void __init prom_check_initrd(unsigned long r3, unsigned long r4) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 7a0c0199ea2..11f3cd9c832 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -32,6 +32,8 @@  #ifdef CONFIG_PPC32  #include <linux/module.h>  #endif +#include <linux/hw_breakpoint.h> +#include <linux/perf_event.h>  #include <asm/uaccess.h>  #include <asm/page.h> @@ -866,9 +868,34 @@ void user_disable_single_step(struct task_struct *task)  	clear_tsk_thread_flag(task, TIF_SINGLESTEP);  } +#ifdef CONFIG_HAVE_HW_BREAKPOINT +void ptrace_triggered(struct perf_event *bp, int nmi, +		      struct perf_sample_data *data, struct pt_regs *regs) +{ +	struct perf_event_attr attr; + +	/* +	 * Disable the breakpoint request here since ptrace has defined a +	 * one-shot behaviour for breakpoint exceptions in PPC64. +	 * The SIGTRAP signal is generated automatically for us in do_dabr(). +	 * We don't have to do anything about that here +	 */ +	attr = bp->attr; +	attr.disabled = true; +	modify_user_hw_breakpoint(bp, &attr); +} +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +  int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,  			       unsigned long data)  { +#ifdef CONFIG_HAVE_HW_BREAKPOINT +	int ret; +	struct thread_struct *thread = &(task->thread); +	struct perf_event *bp; +	struct perf_event_attr attr; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +  	/* For ppc64 we support one DABR and no IABR's at the moment (ppc64).  	 *  For embedded processors we support one DAC and no IAC's at the  	 *  moment. @@ -896,6 +923,43 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,  	/* Ensure breakpoint translation bit is set */  	if (data && !(data & DABR_TRANSLATION))  		return -EIO; +#ifdef CONFIG_HAVE_HW_BREAKPOINT +	bp = thread->ptrace_bps[0]; +	if ((!data) || !(data & (DABR_DATA_WRITE | DABR_DATA_READ))) { +		if (bp) { +			unregister_hw_breakpoint(bp); +			thread->ptrace_bps[0] = NULL; +		} +		return 0; +	} +	if (bp) { +		attr = bp->attr; +		attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN; +		arch_bp_generic_fields(data & +					(DABR_DATA_WRITE | DABR_DATA_READ), +							&attr.bp_type); +		ret =  modify_user_hw_breakpoint(bp, &attr); +		if (ret) +			return ret; +		thread->ptrace_bps[0] = bp; +		thread->dabr = data; +		return 0; +	} + +	/* Create a new breakpoint request if one doesn't exist already */ +	hw_breakpoint_init(&attr); +	attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN; +	arch_bp_generic_fields(data & (DABR_DATA_WRITE | DABR_DATA_READ), +								&attr.bp_type); + +	thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, +							ptrace_triggered, task); +	if (IS_ERR(bp)) { +		thread->ptrace_bps[0] = NULL; +		return PTR_ERR(bp); +	} + +#endif /* CONFIG_HAVE_HW_BREAKPOINT */  	/* Move contents to the DABR register */  	task->thread.dabr = data; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index d0516dbee76..41048de3c6c 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -47,14 +47,6 @@ struct rtas_t rtas = {  };  EXPORT_SYMBOL(rtas); -struct rtas_suspend_me_data { -	atomic_t working; /* number of cpus accessing this struct */ -	atomic_t done; -	int token; /* ibm,suspend-me */ -	int error; -	struct completion *complete; /* wait on this until working == 0 */ -}; -  DEFINE_SPINLOCK(rtas_data_buf_lock);  EXPORT_SYMBOL(rtas_data_buf_lock); @@ -714,14 +706,53 @@ void rtas_os_term(char *str)  static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;  #ifdef CONFIG_PPC_PSERIES -static void rtas_percpu_suspend_me(void *info) +static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_when_done) +{ +	u16 slb_size = mmu_slb_size; +	int rc = H_MULTI_THREADS_ACTIVE; +	int cpu; + +	slb_set_size(SLB_MIN_SIZE); +	printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id()); + +	while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) && +	       !atomic_read(&data->error)) +		rc = rtas_call(data->token, 0, 1, NULL); + +	if (rc || atomic_read(&data->error)) { +		printk(KERN_DEBUG "ibm,suspend-me returned %d\n", rc); +		slb_set_size(slb_size); +	} + +	if (atomic_read(&data->error)) +		rc = atomic_read(&data->error); + +	atomic_set(&data->error, rc); + +	if (wake_when_done) { +		atomic_set(&data->done, 1); + +		for_each_online_cpu(cpu) +			plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); +	} + +	if (atomic_dec_return(&data->working) == 0) +		complete(data->complete); + +	return rc; +} + +int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data) +{ +	atomic_inc(&data->working); +	return __rtas_suspend_last_cpu(data, 0); +} + +static int __rtas_suspend_cpu(struct rtas_suspend_me_data *data, int wake_when_done)  {  	long rc = H_SUCCESS;  	unsigned long msr_save; -	u16 slb_size = mmu_slb_size;  	int cpu; -	struct rtas_suspend_me_data *data = -		(struct rtas_suspend_me_data *)info;  	atomic_inc(&data->working); @@ -729,7 +760,7 @@ static void rtas_percpu_suspend_me(void *info)  	msr_save = mfmsr();  	mtmsr(msr_save & ~(MSR_EE)); -	while (rc == H_SUCCESS && !atomic_read(&data->done)) +	while (rc == H_SUCCESS && !atomic_read(&data->done) && !atomic_read(&data->error))  		rc = plpar_hcall_norets(H_JOIN);  	mtmsr(msr_save); @@ -741,33 +772,37 @@ static void rtas_percpu_suspend_me(void *info)  		/* All other cpus are in H_JOIN, this cpu does  		 * the suspend.  		 */ -		slb_set_size(SLB_MIN_SIZE); -		printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", -		       smp_processor_id()); -		data->error = rtas_call(data->token, 0, 1, NULL); - -		if (data->error) { -			printk(KERN_DEBUG "ibm,suspend-me returned %d\n", -			       data->error); -			slb_set_size(slb_size); -		} +		return __rtas_suspend_last_cpu(data, wake_when_done);  	} else {  		printk(KERN_ERR "H_JOIN on cpu %i failed with rc = %ld\n",  		       smp_processor_id(), rc); -		data->error = rc; +		atomic_set(&data->error, rc);  	} -	atomic_set(&data->done, 1); +	if (wake_when_done) { +		atomic_set(&data->done, 1); -	/* This cpu did the suspend or got an error; in either case, -	 * we need to prod all other other cpus out of join state. -	 * Extra prods are harmless. -	 */ -	for_each_online_cpu(cpu) -		plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); +		/* This cpu did the suspend or got an error; in either case, +		 * we need to prod all other other cpus out of join state. +		 * Extra prods are harmless. +		 */ +		for_each_online_cpu(cpu) +			plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); +	}  out:  	if (atomic_dec_return(&data->working) == 0)  		complete(data->complete); +	return rc; +} + +int rtas_suspend_cpu(struct rtas_suspend_me_data *data) +{ +	return __rtas_suspend_cpu(data, 0); +} + +static void rtas_percpu_suspend_me(void *info) +{ +	__rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1);  }  static int rtas_ibm_suspend_me(struct rtas_args *args) @@ -802,22 +837,22 @@ static int rtas_ibm_suspend_me(struct rtas_args *args)  	atomic_set(&data.working, 0);  	atomic_set(&data.done, 0); +	atomic_set(&data.error, 0);  	data.token = rtas_token("ibm,suspend-me"); -	data.error = 0;  	data.complete = &done;  	/* Call function on all CPUs.  One of us will make the  	 * rtas call  	 */  	if (on_each_cpu(rtas_percpu_suspend_me, &data, 0)) -		data.error = -EINVAL; +		atomic_set(&data.error, -EINVAL);  	wait_for_completion(&done); -	if (data.error != 0) +	if (atomic_read(&data.error) != 0)  		printk(KERN_ERR "Error doing global join\n"); -	return data.error; +	return atomic_read(&data.error);  }  #else /* CONFIG_PPC_PSERIES */  static int rtas_ibm_suspend_me(struct rtas_args *args) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index b7e6c7e193a..70decd8068c 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -94,6 +94,10 @@ struct screen_info screen_info = {  	.orig_video_points = 16  }; +/* Variables required to store legacy IO irq routing */ +int of_i8042_kbd_irq; +int of_i8042_aux_irq; +  #ifdef __DO_IRQ_CANON  /* XXX should go elsewhere eventually */  int ppc_do_canonicalize_irqs; @@ -575,6 +579,15 @@ int check_legacy_ioport(unsigned long base_port)  			np = of_find_compatible_node(NULL, NULL, "pnpPNP,f03");  		if (np) {  			parent = of_get_parent(np); + +			of_i8042_kbd_irq = irq_of_parse_and_map(parent, 0); +			if (!of_i8042_kbd_irq) +				of_i8042_kbd_irq = 1; + +			of_i8042_aux_irq = irq_of_parse_and_map(parent, 1); +			if (!of_i8042_aux_irq) +				of_i8042_aux_irq = 12; +  			of_node_put(np);  			np = parent;  			break; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index d135f93cb0f..1bee4b68fa4 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -142,16 +142,6 @@ early_param("smt-enabled", early_smt_enabled);  #define check_smt_enabled()  #endif /* CONFIG_SMP */ -/* Put the paca pointer into r13 and SPRG_PACA */ -static void __init setup_paca(struct paca_struct *new_paca) -{ -	local_paca = new_paca; -	mtspr(SPRN_SPRG_PACA, local_paca); -#ifdef CONFIG_PPC_BOOK3E -	mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); -#endif -} -  /*   * Early initialization entry point. This is called by head.S   * with MMU translation disabled. We rely on the "feature" of @@ -600,6 +590,9 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)  		return REMOTE_DISTANCE;  } +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); +  void __init setup_per_cpu_areas(void)  {  	const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; @@ -624,8 +617,10 @@ void __init setup_per_cpu_areas(void)  		panic("cannot initialize percpu area (err=%d)", rc);  	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; -	for_each_possible_cpu(cpu) -		paca[cpu].data_offset = delta + pcpu_unit_offsets[cpu]; +	for_each_possible_cpu(cpu) { +                __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; +		paca[cpu].data_offset = __per_cpu_offset[cpu]; +	}  }  #endif diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index a0afb555a7c..7109f5b1baa 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -11,6 +11,7 @@  #include <linux/tracehook.h>  #include <linux/signal.h> +#include <asm/hw_breakpoint.h>  #include <asm/uaccess.h>  #include <asm/unistd.h> @@ -149,6 +150,8 @@ static int do_signal_pending(sigset_t *oldset, struct pt_regs *regs)  	if (current->thread.dabr)  		set_dabr(current->thread.dabr);  #endif +	/* Re-enable the breakpoints for the signal stack */ +	thread_change_pc(current, regs);  	if (is32) {          	if (ka.sa.sa_flags & SA_SIGINFO) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 5c196d1086d..a61b3ddd7bb 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -288,8 +288,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)  			max_cpus = NR_CPUS;  	else  		max_cpus = 1; -  -	smp_space_timers(max_cpus);  	for_each_possible_cpu(cpu)  		if (cpu != boot_cpuid) @@ -501,14 +499,6 @@ int __devinit start_secondary(void *unused)  	current->active_mm = &init_mm;  	smp_store_cpu_info(cpu); - -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) -	/* Clear any pending timer interrupts */ -	mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); - -	/* Enable decrementer interrupt */ -	mtspr(SPRN_TCR, TCR_DIE); -#endif  	set_dec(tb_ticks_per_jiffy);  	preempt_disable();  	cpu_callin_map[cpu] = 1; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 0441bbdadbd..ccb8759c853 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -149,16 +149,6 @@ unsigned long tb_ticks_per_usec = 100; /* sane default */  EXPORT_SYMBOL(tb_ticks_per_usec);  unsigned long tb_ticks_per_sec;  EXPORT_SYMBOL(tb_ticks_per_sec);	/* for cputime_t conversions */ -u64 tb_to_xs; -unsigned tb_to_us; - -#define TICKLEN_SCALE	NTP_SCALE_SHIFT -static u64 last_tick_len;	/* units are ns / 2^TICKLEN_SCALE */ -static u64 ticklen_to_xs;	/* 0.64 fraction */ - -/* If last_tick_len corresponds to about 1/HZ seconds, then -   last_tick_len << TICKLEN_SHIFT will be about 2^63. */ -#define TICKLEN_SHIFT	(63 - 30 - TICKLEN_SCALE + SHIFT_HZ)  DEFINE_SPINLOCK(rtc_lock);  EXPORT_SYMBOL_GPL(rtc_lock); @@ -174,7 +164,6 @@ unsigned long ppc_proc_freq;  EXPORT_SYMBOL(ppc_proc_freq);  unsigned long ppc_tb_freq; -static u64 tb_last_jiffy __cacheline_aligned_in_smp;  static DEFINE_PER_CPU(u64, last_jiffy);  #ifdef CONFIG_VIRT_CPU_ACCOUNTING @@ -423,30 +412,6 @@ void udelay(unsigned long usecs)  }  EXPORT_SYMBOL(udelay); -static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec, -			       u64 new_tb_to_xs) -{ -	/* -	 * tb_update_count is used to allow the userspace gettimeofday code -	 * to assure itself that it sees a consistent view of the tb_to_xs and -	 * stamp_xsec variables.  It reads the tb_update_count, then reads -	 * tb_to_xs and stamp_xsec and then reads tb_update_count again.  If -	 * the two values of tb_update_count match and are even then the -	 * tb_to_xs and stamp_xsec values are consistent.  If not, then it -	 * loops back and reads them again until this criteria is met. -	 * We expect the caller to have done the first increment of -	 * vdso_data->tb_update_count already. -	 */ -	vdso_data->tb_orig_stamp = new_tb_stamp; -	vdso_data->stamp_xsec = new_stamp_xsec; -	vdso_data->tb_to_xs = new_tb_to_xs; -	vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec; -	vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec; -	vdso_data->stamp_xtime = xtime; -	smp_wmb(); -	++(vdso_data->tb_update_count); -} -  #ifdef CONFIG_SMP  unsigned long profile_pc(struct pt_regs *regs)  { @@ -470,7 +435,6 @@ EXPORT_SYMBOL(profile_pc);  static int __init iSeries_tb_recal(void)  { -	struct div_result divres;  	unsigned long titan, tb;  	/* Make sure we only run on iSeries */ @@ -501,10 +465,7 @@ static int __init iSeries_tb_recal(void)  				tb_ticks_per_jiffy = new_tb_ticks_per_jiffy;  				tb_ticks_per_sec   = new_tb_ticks_per_sec;  				calc_cputime_factors(); -				div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres ); -				tb_to_xs = divres.result_low;  				vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; -				vdso_data->tb_to_xs = tb_to_xs;  				setup_cputime_one_jiffy();  			}  			else { @@ -667,27 +628,9 @@ void timer_interrupt(struct pt_regs * regs)  	trace_timer_interrupt_exit(regs);  } -void wakeup_decrementer(void) -{ -	unsigned long ticks; - -	/* -	 * The timebase gets saved on sleep and restored on wakeup, -	 * so all we need to do is to reset the decrementer. -	 */ -	ticks = tb_ticks_since(__get_cpu_var(last_jiffy)); -	if (ticks < tb_ticks_per_jiffy) -		ticks = tb_ticks_per_jiffy - ticks; -	else -		ticks = 1; -	set_dec(ticks); -} -  #ifdef CONFIG_SUSPEND -void generic_suspend_disable_irqs(void) +static void generic_suspend_disable_irqs(void)  { -	preempt_disable(); -  	/* Disable the decrementer, so that it doesn't interfere  	 * with suspending.  	 */ @@ -697,12 +640,9 @@ void generic_suspend_disable_irqs(void)  	set_dec(0x7fffffff);  } -void generic_suspend_enable_irqs(void) +static void generic_suspend_enable_irqs(void)  { -	wakeup_decrementer(); -  	local_irq_enable(); -	preempt_enable();  }  /* Overrides the weak version in kernel/power/main.c */ @@ -722,23 +662,6 @@ void arch_suspend_enable_irqs(void)  }  #endif -#ifdef CONFIG_SMP -void __init smp_space_timers(unsigned int max_cpus) -{ -	int i; -	u64 previous_tb = per_cpu(last_jiffy, boot_cpuid); - -	/* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */ -	previous_tb -= tb_ticks_per_jiffy; - -	for_each_possible_cpu(i) { -		if (i == boot_cpuid) -			continue; -		per_cpu(last_jiffy, i) = previous_tb; -	} -} -#endif -  /*   * Scheduler clock - returns current time in nanosec units.   * @@ -873,10 +796,37 @@ static cycle_t timebase_read(struct clocksource *cs)  	return (cycle_t)get_tb();  } +static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec, +			       u64 new_tb_to_xs, struct timespec *now, +			       u32 frac_sec) +{ +	/* +	 * tb_update_count is used to allow the userspace gettimeofday code +	 * to assure itself that it sees a consistent view of the tb_to_xs and +	 * stamp_xsec variables.  It reads the tb_update_count, then reads +	 * tb_to_xs and stamp_xsec and then reads tb_update_count again.  If +	 * the two values of tb_update_count match and are even then the +	 * tb_to_xs and stamp_xsec values are consistent.  If not, then it +	 * loops back and reads them again until this criteria is met. +	 * We expect the caller to have done the first increment of +	 * vdso_data->tb_update_count already. +	 */ +	vdso_data->tb_orig_stamp = new_tb_stamp; +	vdso_data->stamp_xsec = new_stamp_xsec; +	vdso_data->tb_to_xs = new_tb_to_xs; +	vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec; +	vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec; +	vdso_data->stamp_xtime = *now; +	vdso_data->stamp_sec_fraction = frac_sec; +	smp_wmb(); +	++(vdso_data->tb_update_count); +} +  void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,  		     u32 mult)  {  	u64 t2x, stamp_xsec; +	u32 frac_sec;  	if (clock != &clocksource_timebase)  		return; @@ -888,10 +838,14 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,  	/* XXX this assumes clock->shift == 22 */  	/* 4611686018 ~= 2^(20+64-22) / 1e9 */  	t2x = (u64) mult * 4611686018ULL; -	stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC; +	stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC;  	do_div(stamp_xsec, 1000000000); -	stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC; -	update_gtod(clock->cycle_last, stamp_xsec, t2x); +	stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC; + +	BUG_ON(wall_time->tv_nsec >= NSEC_PER_SEC); +	/* this is tv_nsec / 1e9 as a 0.32 fraction */ +	frac_sec = ((u64) wall_time->tv_nsec * 18446744073ULL) >> 32; +	update_gtod(clock->cycle_last, stamp_xsec, t2x, wall_time, frac_sec);  }  void update_vsyscall_tz(void) @@ -1007,15 +961,13 @@ void secondary_cpu_time_init(void)  /* This function is only called on the boot processor */  void __init time_init(void)  { -	unsigned long flags;  	struct div_result res; -	u64 scale, x; +	u64 scale;  	unsigned shift;  	if (__USE_RTC()) {  		/* 601 processor: dec counts down by 128 every 128ns */  		ppc_tb_freq = 1000000000; -		tb_last_jiffy = get_rtcl();  	} else {  		/* Normal PowerPC with timebase register */  		ppc_md.calibrate_decr(); @@ -1023,50 +975,15 @@ void __init time_init(void)  		       ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);  		printk(KERN_DEBUG "time_init: processor frequency   = %lu.%.6lu MHz\n",  		       ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); -		tb_last_jiffy = get_tb();  	}  	tb_ticks_per_jiffy = ppc_tb_freq / HZ;  	tb_ticks_per_sec = ppc_tb_freq;  	tb_ticks_per_usec = ppc_tb_freq / 1000000; -	tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000);  	calc_cputime_factors();  	setup_cputime_one_jiffy();  	/* -	 * Calculate the length of each tick in ns.  It will not be -	 * exactly 1e9/HZ unless ppc_tb_freq is divisible by HZ. -	 * We compute 1e9 * tb_ticks_per_jiffy / ppc_tb_freq, -	 * rounded up. -	 */ -	x = (u64) NSEC_PER_SEC * tb_ticks_per_jiffy + ppc_tb_freq - 1; -	do_div(x, ppc_tb_freq); -	tick_nsec = x; -	last_tick_len = x << TICKLEN_SCALE; - -	/* -	 * Compute ticklen_to_xs, which is a factor which gets multiplied -	 * by (last_tick_len << TICKLEN_SHIFT) to get a tb_to_xs value. -	 * It is computed as: -	 * ticklen_to_xs = 2^N / (tb_ticks_per_jiffy * 1e9) -	 * where N = 64 + 20 - TICKLEN_SCALE - TICKLEN_SHIFT -	 * which turns out to be N = 51 - SHIFT_HZ. -	 * This gives the result as a 0.64 fixed-point fraction. -	 * That value is reduced by an offset amounting to 1 xsec per -	 * 2^31 timebase ticks to avoid problems with time going backwards -	 * by 1 xsec when we do timer_recalc_offset due to losing the -	 * fractional xsec.  That offset is equal to ppc_tb_freq/2^51 -	 * since there are 2^20 xsec in a second. -	 */ -	div128_by_32((1ULL << 51) - ppc_tb_freq, 0, -		     tb_ticks_per_jiffy << SHIFT_HZ, &res); -	div128_by_32(res.result_high, res.result_low, NSEC_PER_SEC, &res); -	ticklen_to_xs = res.result_low; - -	/* Compute tb_to_xs from tick_nsec */ -	tb_to_xs = mulhdu(last_tick_len << TICKLEN_SHIFT, ticklen_to_xs); - -	/*  	 * Compute scale factor for sched_clock.  	 * The calibrate_decr() function has set tb_ticks_per_sec,  	 * which is the timebase frequency. @@ -1087,21 +1004,14 @@ void __init time_init(void)  	/* Save the current timebase to pretty up CONFIG_PRINTK_TIME */  	boot_tb = get_tb_or_rtc(); -	write_seqlock_irqsave(&xtime_lock, flags); -  	/* If platform provided a timezone (pmac), we correct the time */          if (timezone_offset) {  		sys_tz.tz_minuteswest = -timezone_offset / 60;  		sys_tz.tz_dsttime = 0;          } -	vdso_data->tb_orig_stamp = tb_last_jiffy;  	vdso_data->tb_update_count = 0;  	vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; -	vdso_data->stamp_xsec = (u64) xtime.tv_sec * XSEC_PER_SEC; -	vdso_data->tb_to_xs = tb_to_xs; - -	write_sequnlock_irqrestore(&xtime_lock, flags);  	/* Start the decrementer on CPUs that have manual control  	 * such as BookE @@ -1195,39 +1105,6 @@ void to_tm(int tim, struct rtc_time * tm)  	GregorianDay(tm);  } -/* Auxiliary function to compute scaling factors */ -/* Actually the choice of a timebase running at 1/4 the of the bus - * frequency giving resolution of a few tens of nanoseconds is quite nice. - * It makes this computation very precise (27-28 bits typically) which - * is optimistic considering the stability of most processor clock - * oscillators and the precision with which the timebase frequency - * is measured but does not harm. - */ -unsigned mulhwu_scale_factor(unsigned inscale, unsigned outscale) -{ -        unsigned mlt=0, tmp, err; -        /* No concern for performance, it's done once: use a stupid -         * but safe and compact method to find the multiplier. -         */ -   -        for (tmp = 1U<<31; tmp != 0; tmp >>= 1) { -                if (mulhwu(inscale, mlt|tmp) < outscale) -			mlt |= tmp; -        } -   -        /* We might still be off by 1 for the best approximation. -         * A side effect of this is that if outscale is too large -         * the returned value will be zero. -         * Many corner cases have been checked and seem to work, -         * some might have been forgotten in the test however. -         */ -   -        err = inscale * (mlt+1); -        if (err <= inscale/2) -		mlt++; -        return mlt; -} -  /*   * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit   * result. diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 25fc33984c2..a45a63c3a0c 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -55,9 +55,6 @@  #endif  #include <asm/kexec.h>  #include <asm/ppc-opcode.h> -#ifdef CONFIG_FSL_BOOKE -#include <asm/dbell.h> -#endif  #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)  int (*__debugger)(struct pt_regs *regs) __read_mostly; @@ -688,7 +685,7 @@ void RunModeException(struct pt_regs *regs)  void __kprobes single_step_exception(struct pt_regs *regs)  { -	regs->msr &= ~(MSR_SE | MSR_BE);  /* Turn off 'trace' bits */ +	clear_single_step(regs);  	if (notify_die(DIE_SSTEP, "single_step", regs, 5,  					5, SIGTRAP) == NOTIFY_STOP) @@ -707,10 +704,8 @@ void __kprobes single_step_exception(struct pt_regs *regs)   */  static void emulate_single_step(struct pt_regs *regs)  { -	if (single_stepping(regs)) { -		clear_single_step(regs); -		_exception(SIGTRAP, regs, TRAP_TRACE, 0); -	} +	if (single_stepping(regs)) +		single_step_exception(regs);  }  static inline int __parse_fpscr(unsigned long fpscr) @@ -1344,24 +1339,6 @@ void vsx_assist_exception(struct pt_regs *regs)  #endif /* CONFIG_VSX */  #ifdef CONFIG_FSL_BOOKE - -void doorbell_exception(struct pt_regs *regs) -{ -#ifdef CONFIG_SMP -	int cpu = smp_processor_id(); -	int msg; - -	if (num_online_cpus() < 2) -		return; - -	for (msg = 0; msg < 4; msg++) -		if (test_and_clear_bit(msg, &dbell_smp_message[cpu])) -			smp_message_recv(msg); -#else -	printk(KERN_WARNING "Received doorbell on non-smp system\n"); -#endif -} -  void CacheLockingException(struct pt_regs *regs, unsigned long address,  			   unsigned long error_code)  { diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index ee038d4bf25..4ee09ee2e83 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -19,8 +19,10 @@  /* Offset for the low 32-bit part of a field of long type */  #ifdef CONFIG_PPC64  #define LOPART	4 +#define TSPEC_TV_SEC	TSPC64_TV_SEC+LOPART  #else  #define LOPART	0 +#define TSPEC_TV_SEC	TSPC32_TV_SEC  #endif  	.text @@ -41,23 +43,11 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)  	mr	r9, r3			/* datapage ptr in r9 */  	cmplwi	r10,0			/* check if tv is NULL */  	beq	3f -	bl	__do_get_xsec@local	/* get xsec from tb & kernel */ -	bne-	2f			/* out of line -> do syscall */ - -	/* seconds are xsec >> 20 */ -	rlwinm	r5,r4,12,20,31 -	rlwimi	r5,r3,12,0,19 -	stw	r5,TVAL32_TV_SEC(r10) - -	/* get remaining xsec and convert to usec. we scale -	 * up remaining xsec by 12 bits and get the top 32 bits -	 * of the multiplication -	 */ -	rlwinm	r5,r4,12,0,19 -	lis	r6,1000000@h -	ori	r6,r6,1000000@l -	mulhwu	r5,r5,r6 -	stw	r5,TVAL32_TV_USEC(r10) +	lis	r7,1000000@ha		/* load up USEC_PER_SEC */ +	addi	r7,r7,1000000@l		/* so we get microseconds in r4 */ +	bl	__do_get_tspec@local	/* get sec/usec from tb & kernel */ +	stw	r3,TVAL32_TV_SEC(r10) +	stw	r4,TVAL32_TV_USEC(r10)  3:	cmplwi	r11,0			/* check if tz is NULL */  	beq	1f @@ -70,14 +60,6 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)  	crclr	cr0*4+so  	li	r3,0  	blr - -2: -	mtlr	r12 -	mr	r3,r10 -	mr	r4,r11 -	li	r0,__NR_gettimeofday -	sc -	blr    .cfi_endproc  V_FUNCTION_END(__kernel_gettimeofday) @@ -100,7 +82,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)  	mr	r11,r4			/* r11 saves tp */  	bl	__get_datapage@local	/* get data page */  	mr	r9,r3			/* datapage ptr in r9 */ - +	lis	r7,NSEC_PER_SEC@h	/* want nanoseconds */ +	ori	r7,r7,NSEC_PER_SEC@l  50:	bl	__do_get_tspec@local	/* get sec/nsec from tb & kernel */  	bne	cr1,80f			/* not monotonic -> all done */ @@ -198,83 +181,12 @@ V_FUNCTION_END(__kernel_clock_getres)  /* - * This is the core of gettimeofday() & friends, it returns the xsec - * value in r3 & r4 and expects the datapage ptr (non clobbered) - * in r9. clobbers r0,r4,r5,r6,r7,r8. - * When returning, r8 contains the counter value that can be reused - * by the monotonic clock implementation - */ -__do_get_xsec: -  .cfi_startproc -	/* Check for update count & load values. We use the low -	 * order 32 bits of the update count -	 */ -1:	lwz	r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9) -	andi.	r0,r8,1			/* pending update ? loop */ -	bne-	1b -	xor	r0,r8,r8		/* create dependency */ -	add	r9,r9,r0 - -	/* Load orig stamp (offset to TB) */ -	lwz	r5,CFG_TB_ORIG_STAMP(r9) -	lwz	r6,(CFG_TB_ORIG_STAMP+4)(r9) - -	/* Get a stable TB value */ -2:	mftbu	r3 -	mftbl	r4 -	mftbu	r0 -	cmpl	cr0,r3,r0 -	bne-	2b - -	/* Substract tb orig stamp. If the high part is non-zero, we jump to -	 * the slow path which call the syscall. -	 * If it's ok, then we have our 32 bits tb_ticks value in r7 -	 */ -	subfc	r7,r6,r4 -	subfe.	r0,r5,r3 -	bne-	3f - -	/* Load scale factor & do multiplication */ -	lwz	r5,CFG_TB_TO_XS(r9)	/* load values */ -	lwz	r6,(CFG_TB_TO_XS+4)(r9) -	mulhwu	r4,r7,r5 -	mulhwu	r6,r7,r6 -	mullw	r0,r7,r5 -	addc	r6,r6,r0 - -	/* At this point, we have the scaled xsec value in r4 + XER:CA -	 * we load & add the stamp since epoch -	 */ -	lwz	r5,CFG_STAMP_XSEC(r9) -	lwz	r6,(CFG_STAMP_XSEC+4)(r9) -	adde	r4,r4,r6 -	addze	r3,r5 - -	/* We now have our result in r3,r4. We create a fake dependency -	 * on that result and re-check the counter -	 */ -	or	r6,r4,r3 -	xor	r0,r6,r6 -	add	r9,r9,r0 -	lwz	r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) -        cmpl    cr0,r8,r0		/* check if updated */ -	bne-	1b - -	/* Warning ! The caller expects CR:EQ to be set to indicate a -	 * successful calculation (so it won't fallback to the syscall -	 * method). We have overriden that CR bit in the counter check, -	 * but fortunately, the loop exit condition _is_ CR:EQ set, so -	 * we can exit safely here. If you change this code, be careful -	 * of that side effect. -	 */ -3:	blr -  .cfi_endproc - -/* - * This is the core of clock_gettime(), it returns the current - * time in seconds and nanoseconds in r3 and r4. + * This is the core of clock_gettime() and gettimeofday(), + * it returns the current time in r3 (seconds) and r4. + * On entry, r7 gives the resolution of r4, either USEC_PER_SEC + * or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds.   * It expects the datapage ptr in r9 and doesn't clobber it. - * It clobbers r0, r5, r6, r10 and returns NSEC_PER_SEC in r7. + * It clobbers r0, r5 and r6.   * On return, r8 contains the counter value that can be reused.   * This clobbers cr0 but not any other cr field.   */ @@ -297,70 +209,58 @@ __do_get_tspec:  2:	mftbu	r3  	mftbl	r4  	mftbu	r0 -	cmpl	cr0,r3,r0 +	cmplw	cr0,r3,r0  	bne-	2b  	/* Subtract tb orig stamp and shift left 12 bits.  	 */ -	subfc	r7,r6,r4 +	subfc	r4,r6,r4  	subfe	r0,r5,r3  	slwi	r0,r0,12 -	rlwimi.	r0,r7,12,20,31 -	slwi	r7,r7,12 +	rlwimi.	r0,r4,12,20,31 +	slwi	r4,r4,12 -	/* Load scale factor & do multiplication */ +	/* +	 * Load scale factor & do multiplication. +	 * We only use the high 32 bits of the tb_to_xs value. +	 * Even with a 1GHz timebase clock, the high 32 bits of +	 * tb_to_xs will be at least 4 million, so the error from +	 * ignoring the low 32 bits will be no more than 0.25ppm. +	 * The error will just make the clock run very very slightly +	 * slow until the next time the kernel updates the VDSO data, +	 * at which point the clock will catch up to the kernel's value, +	 * so there is no long-term error accumulation. +	 */  	lwz	r5,CFG_TB_TO_XS(r9)	/* load values */ -	lwz	r6,(CFG_TB_TO_XS+4)(r9) -	mulhwu	r3,r7,r6 -	mullw	r10,r7,r5 -	mulhwu	r4,r7,r5 -	addc	r10,r3,r10 +	mulhwu	r4,r4,r5  	li	r3,0  	beq+	4f			/* skip high part computation if 0 */  	mulhwu	r3,r0,r5 -	mullw	r7,r0,r5 -	mulhwu	r5,r0,r6 -	mullw	r6,r0,r6 -	adde	r4,r4,r7 -	addze	r3,r3 +	mullw	r5,r0,r5  	addc	r4,r4,r5  	addze	r3,r3 -	addc	r10,r10,r6 - -4:	addze	r4,r4			/* add in carry */ -	lis	r7,NSEC_PER_SEC@h -	ori	r7,r7,NSEC_PER_SEC@l -	mulhwu	r4,r4,r7		/* convert to nanoseconds */ - -	/* At this point, we have seconds & nanoseconds since the xtime -	 * stamp in r3+CA and r4.  Load & add the xtime stamp. +4: +	/* At this point, we have seconds since the xtime stamp +	 * as a 32.32 fixed-point number in r3 and r4. +	 * Load & add the xtime stamp.  	 */ -#ifdef CONFIG_PPC64 -	lwz	r5,STAMP_XTIME+TSPC64_TV_SEC+LOPART(r9) -	lwz	r6,STAMP_XTIME+TSPC64_TV_NSEC+LOPART(r9) -#else -	lwz	r5,STAMP_XTIME+TSPC32_TV_SEC(r9) -	lwz	r6,STAMP_XTIME+TSPC32_TV_NSEC(r9) -#endif -	add	r4,r4,r6 +	lwz	r5,STAMP_XTIME+TSPEC_TV_SEC(r9) +	lwz	r6,STAMP_SEC_FRAC(r9) +	addc	r4,r4,r6  	adde	r3,r3,r5 -	/* We now have our result in r3,r4. We create a fake dependency -	 * on that result and re-check the counter +	/* We create a fake dependency on the result in r3/r4 +	 * and re-check the counter  	 */  	or	r6,r4,r3  	xor	r0,r6,r6  	add	r9,r9,r0  	lwz	r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) -        cmpl    cr0,r8,r0		/* check if updated */ +        cmplw	cr0,r8,r0		/* check if updated */  	bne-	1b -	/* check for nanosecond overflow and adjust if necessary */ -	cmpw	r4,r7 -	bltlr				/* all done if no overflow */ -	subf	r4,r7,r4		/* adjust if overflow */ -	addi	r3,r3,1 +	mulhwu	r4,r4,r7		/* convert to micro or nanoseconds */  	blr    .cfi_endproc diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 262cd5857a5..e97a9a0dc4a 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -33,18 +33,11 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)  	bl	V_LOCAL_FUNC(__get_datapage)	/* get data page */  	cmpldi	r11,0			/* check if tv is NULL */  	beq	2f -	bl	V_LOCAL_FUNC(__do_get_xsec)	/* get xsec from tb & kernel */ -	lis     r7,15			/* r7 = 1000000 = USEC_PER_SEC */ -	ori     r7,r7,16960 -	rldicl  r5,r4,44,20		/* r5 = sec = xsec / XSEC_PER_SEC */ -	rldicr  r6,r5,20,43		/* r6 = sec * XSEC_PER_SEC */ -	std	r5,TVAL64_TV_SEC(r11)	/* store sec in tv */ -	subf	r0,r6,r4		/* r0 = xsec = (xsec - r6) */ -	mulld   r0,r0,r7		/* usec = (xsec * USEC_PER_SEC) / -					 * XSEC_PER_SEC -					 */ -	rldicl  r0,r0,44,20 -	std	r0,TVAL64_TV_USEC(r11)	/* store usec in tv */ +	lis	r7,1000000@ha		/* load up USEC_PER_SEC */ +	addi	r7,r7,1000000@l +	bl	V_LOCAL_FUNC(__do_get_tspec) /* get sec/us from tb & kernel */ +	std	r4,TVAL64_TV_SEC(r11)	/* store sec in tv */ +	std	r5,TVAL64_TV_USEC(r11)	/* store usec in tv */  2:	cmpldi	r10,0			/* check if tz is NULL */  	beq	1f  	lwz	r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */ @@ -77,6 +70,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)    .cfi_register lr,r12  	mr	r11,r4			/* r11 saves tp */  	bl	V_LOCAL_FUNC(__get_datapage)	/* get data page */ +	lis	r7,NSEC_PER_SEC@h	/* want nanoseconds */ +	ori	r7,r7,NSEC_PER_SEC@l  50:	bl	V_LOCAL_FUNC(__do_get_tspec)	/* get time from tb & kernel */  	bne	cr1,80f			/* if not monotonic, all done */ @@ -171,49 +166,12 @@ V_FUNCTION_END(__kernel_clock_getres)  /* - * This is the core of gettimeofday(), it returns the xsec - * value in r4 and expects the datapage ptr (non clobbered) - * in r3. clobbers r0,r4,r5,r6,r7,r8 - * When returning, r8 contains the counter value that can be reused - */ -V_FUNCTION_BEGIN(__do_get_xsec) -  .cfi_startproc -	/* check for update count & load values */ -1:	ld	r8,CFG_TB_UPDATE_COUNT(r3) -	andi.	r0,r8,1			/* pending update ? loop */ -	bne-	1b -	xor	r0,r8,r8		/* create dependency */ -	add	r3,r3,r0 - -	/* Get TB & offset it. We use the MFTB macro which will generate -	 * workaround code for Cell. -	 */ -	MFTB(r7) -	ld	r9,CFG_TB_ORIG_STAMP(r3) -	subf	r7,r9,r7 - -	/* Scale result */ -	ld	r5,CFG_TB_TO_XS(r3) -	mulhdu	r7,r7,r5 - -	/* Add stamp since epoch */ -	ld	r6,CFG_STAMP_XSEC(r3) -	add	r4,r6,r7 - -	xor	r0,r4,r4 -	add	r3,r3,r0 -	ld	r0,CFG_TB_UPDATE_COUNT(r3) -        cmpld   cr0,r0,r8		/* check if updated */ -	bne-	1b -	blr -  .cfi_endproc -V_FUNCTION_END(__do_get_xsec) - -/* - * This is the core of clock_gettime(), it returns the current - * time in seconds and nanoseconds in r4 and r5. + * This is the core of clock_gettime() and gettimeofday(), + * it returns the current time in r4 (seconds) and r5. + * On entry, r7 gives the resolution of r5, either USEC_PER_SEC + * or NSEC_PER_SEC, giving r5 in microseconds or nanoseconds.   * It expects the datapage ptr in r3 and doesn't clobber it. - * It clobbers r0 and r6 and returns NSEC_PER_SEC in r7. + * It clobbers r0, r6 and r9.   * On return, r8 contains the counter value that can be reused.   * This clobbers cr0 but not any other cr field.   */ @@ -229,18 +187,18 @@ V_FUNCTION_BEGIN(__do_get_tspec)  	/* Get TB & offset it. We use the MFTB macro which will generate  	 * workaround code for Cell.  	 */ -	MFTB(r7) +	MFTB(r6)  	ld	r9,CFG_TB_ORIG_STAMP(r3) -	subf	r7,r9,r7 +	subf	r6,r9,r6  	/* Scale result */  	ld	r5,CFG_TB_TO_XS(r3) -	sldi	r7,r7,12		/* compute time since stamp_xtime */ -	mulhdu	r6,r7,r5		/* in units of 2^-32 seconds */ +	sldi	r6,r6,12		/* compute time since stamp_xtime */ +	mulhdu	r6,r6,r5		/* in units of 2^-32 seconds */  	/* Add stamp since epoch */  	ld	r4,STAMP_XTIME+TSPC64_TV_SEC(r3) -	ld	r5,STAMP_XTIME+TSPC64_TV_NSEC(r3) +	lwz	r5,STAMP_SEC_FRAC(r3)  	or	r0,r4,r5  	or	r0,r0,r6  	xor	r0,r0,r0 @@ -250,17 +208,11 @@ V_FUNCTION_BEGIN(__do_get_tspec)  	bne-	1b			/* reload if so */  	/* convert to seconds & nanoseconds and add to stamp */ -	lis	r7,NSEC_PER_SEC@h -	ori	r7,r7,NSEC_PER_SEC@l -	mulhwu	r0,r6,r7		/* compute nanoseconds and */ +	add	r6,r6,r5		/* add on fractional seconds of xtime */ +	mulhwu	r5,r6,r7		/* compute micro or nanoseconds and */  	srdi	r6,r6,32		/* seconds since stamp_xtime */ -	clrldi	r0,r0,32 -	add	r5,r5,r0		/* add nanoseconds together */ -	cmpd	r5,r7			/* overflow? */ +	clrldi	r5,r5,32  	add	r4,r4,r6 -	bltlr				/* all done if no overflow */ -	subf	r5,r7,r5		/* if overflow, adjust */ -	addi	r4,r4,1  	blr    .cfi_endproc  V_FUNCTION_END(__do_get_tspec)  |