diff options
| author | Ingo Molnar <mingo@kernel.org> | 2012-07-31 17:05:27 +0200 | 
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2012-07-31 17:05:27 +0200 | 
| commit | 1f815faec46e83bc96039797151846b60875bb06 (patch) | |
| tree | 41a874f8090e3f69c8a9f13e3c85dd7a371e569b | |
| parent | b44d50dcacea0d485ca2ff9140f8cc28ee22f28d (diff) | |
| parent | e2b34e311be3a57c9abcb927e37a57e38913714c (diff) | |
| download | olio-linux-3.10-1f815faec46e83bc96039797151846b60875bb06.tar.xz olio-linux-3.10-1f815faec46e83bc96039797151846b60875bb06.zip  | |
Merge branch 'linus' into timers/urgent
Merge in Linus's branch which already has timers/core merged.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
396 files changed, 9302 insertions, 4783 deletions
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 5c8d7496809..fc103d7a047 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -162,9 +162,9 @@ over a rather long period of time, but improvements are always welcome!  		when publicizing a pointer to a structure that can  		be traversed by an RCU read-side critical section. -5.	If call_rcu(), or a related primitive such as call_rcu_bh() or -	call_rcu_sched(), is used, the callback function must be -	written to be called from softirq context.  In particular, +5.	If call_rcu(), or a related primitive such as call_rcu_bh(), +	call_rcu_sched(), or call_srcu() is used, the callback function +	must be written to be called from softirq context.  In particular,  	it cannot block.  6.	Since synchronize_rcu() can block, it cannot be called from @@ -202,11 +202,12 @@ over a rather long period of time, but improvements are always welcome!  	updater uses call_rcu_sched() or synchronize_sched(), then  	the corresponding readers must disable preemption, possibly  	by calling rcu_read_lock_sched() and rcu_read_unlock_sched(). -	If the updater uses synchronize_srcu(), the the corresponding -	readers must use srcu_read_lock() and srcu_read_unlock(), -	and with the same srcu_struct.	The rules for the expedited -	primitives are the same as for their non-expedited counterparts. -	Mixing things up will result in confusion and broken kernels. +	If the updater uses synchronize_srcu() or call_srcu(), +	the the corresponding readers must use srcu_read_lock() and +	srcu_read_unlock(), and with the same srcu_struct.  The rules for +	the expedited primitives are the same as for their non-expedited +	counterparts.  Mixing things up will result in confusion and +	broken kernels.  	One exception to this rule: rcu_read_lock() and rcu_read_unlock()  	may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh() @@ -333,14 +334,14 @@ over a rather long period of time, but improvements are always welcome!  	victim CPU from ever going offline.)  14.	SRCU (srcu_read_lock(), srcu_read_unlock(), srcu_dereference(), -	synchronize_srcu(), and synchronize_srcu_expedited()) may only -	be invoked from process context.  Unlike other forms of RCU, it -	-is- permissible to block in an SRCU read-side critical section -	(demarked by srcu_read_lock() and srcu_read_unlock()), hence the -	"SRCU": "sleepable RCU".  Please note that if you don't need -	to sleep in read-side critical sections, you should be using -	RCU rather than SRCU, because RCU is almost always faster and -	easier to use than is SRCU. +	synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu()) +	may only be invoked from process context.  Unlike other forms of +	RCU, it -is- permissible to block in an SRCU read-side critical +	section (demarked by srcu_read_lock() and srcu_read_unlock()), +	hence the "SRCU": "sleepable RCU".  Please note that if you +	don't need to sleep in read-side critical sections, you should be +	using RCU rather than SRCU, because RCU is almost always faster +	and easier to use than is SRCU.  	If you need to enter your read-side critical section in a  	hardirq or exception handler, and then exit that same read-side @@ -353,8 +354,8 @@ over a rather long period of time, but improvements are always welcome!  	cleanup_srcu_struct().	These are passed a "struct srcu_struct"  	that defines the scope of a given SRCU domain.	Once initialized,  	the srcu_struct is passed to srcu_read_lock(), srcu_read_unlock() -	synchronize_srcu(), and synchronize_srcu_expedited().  A given -	synchronize_srcu() waits only for SRCU read-side critical +	synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu(). +	A given synchronize_srcu() waits only for SRCU read-side critical  	sections governed by srcu_read_lock() and srcu_read_unlock()  	calls that have been passed the same srcu_struct.  This property  	is what makes sleeping read-side critical sections tolerable -- @@ -374,7 +375,7 @@ over a rather long period of time, but improvements are always welcome!  	requiring SRCU's read-side deadlock immunity or low read-side  	realtime latency. -	Note that, rcu_assign_pointer() relates to SRCU just as they do +	Note that, rcu_assign_pointer() relates to SRCU just as it does  	to other forms of RCU.  15.	The whole point of call_rcu(), synchronize_rcu(), and friends diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt index e439a0edee2..38428c12513 100644 --- a/Documentation/RCU/rcubarrier.txt +++ b/Documentation/RCU/rcubarrier.txt @@ -79,8 +79,6 @@ complete. Pseudo-code using rcu_barrier() is as follows:     2. Execute rcu_barrier().     3. Allow the module to be unloaded. -Quick Quiz #1: Why is there no srcu_barrier()? -  The rcutorture module makes use of rcu_barrier in its exit function  as follows: @@ -162,7 +160,7 @@ for any pre-existing callbacks to complete.  Then lines 55-62 print status and do operation-specific cleanup, and  then return, permitting the module-unload operation to be completed. -Quick Quiz #2: Is there any other situation where rcu_barrier() might +Quick Quiz #1: Is there any other situation where rcu_barrier() might  	be required?  Your module might have additional complications. For example, if your @@ -242,7 +240,7 @@ reaches zero, as follows:   4 complete(&rcu_barrier_completion);   5 } -Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes +Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes  	immediately (thus incrementing rcu_barrier_cpu_count to the  	value one), but the other CPU's rcu_barrier_func() invocations  	are delayed for a full grace period? Couldn't this result in @@ -259,12 +257,7 @@ so that your module may be safely unloaded.  Answers to Quick Quizzes -Quick Quiz #1: Why is there no srcu_barrier()? - -Answer: Since there is no call_srcu(), there can be no outstanding SRCU -	callbacks. Therefore, there is no need to wait for them. - -Quick Quiz #2: Is there any other situation where rcu_barrier() might +Quick Quiz #1: Is there any other situation where rcu_barrier() might  	be required?  Answer: Interestingly enough, rcu_barrier() was not originally @@ -278,7 +271,7 @@ Answer: Interestingly enough, rcu_barrier() was not originally  	implementing rcutorture, and found that rcu_barrier() solves  	this problem as well. -Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes +Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes  	immediately (thus incrementing rcu_barrier_cpu_count to the  	value one), but the other CPU's rcu_barrier_func() invocations  	are delayed for a full grace period? Couldn't this result in diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt index 4ddf3913fd8..7dce8a17eac 100644 --- a/Documentation/RCU/torture.txt +++ b/Documentation/RCU/torture.txt @@ -174,11 +174,20 @@ torture_type	The type of RCU to test, with string values as follows:  			and synchronize_rcu_bh_expedited().  		"srcu": srcu_read_lock(), srcu_read_unlock() and +			call_srcu(). + +		"srcu_sync": srcu_read_lock(), srcu_read_unlock() and  			synchronize_srcu().  		"srcu_expedited": srcu_read_lock(), srcu_read_unlock() and  			synchronize_srcu_expedited(). +		"srcu_raw": srcu_read_lock_raw(), srcu_read_unlock_raw(), +			and call_srcu(). + +		"srcu_raw_sync": srcu_read_lock_raw(), srcu_read_unlock_raw(), +			and synchronize_srcu(). +  		"sched": preempt_disable(), preempt_enable(), and  			call_rcu_sched(). diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 6bbe8dcdc3d..69ee188515e 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -833,9 +833,9 @@ sched:	Critical sections	Grace period		Barrier  SRCU:	Critical sections	Grace period		Barrier -	srcu_read_lock		synchronize_srcu	N/A -	srcu_read_unlock	synchronize_srcu_expedited -	srcu_read_lock_raw +	srcu_read_lock		synchronize_srcu	srcu_barrier +	srcu_read_unlock	call_srcu +	srcu_read_lock_raw	synchronize_srcu_expedited  	srcu_read_unlock_raw  	srcu_dereference diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt index 82b43f91585..a4119f6422d 100644 --- a/Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt @@ -1626,3 +1626,5 @@ MX6Q_PAD_SD2_DAT3__PCIE_CTRL_MUX_11		1587  MX6Q_PAD_SD2_DAT3__GPIO_1_12			1588  MX6Q_PAD_SD2_DAT3__SJC_DONE			1589  MX6Q_PAD_SD2_DAT3__ANATOP_TESTO_3		1590 +MX6Q_PAD_ENET_RX_ER__ANATOP_USBOTG_ID		1591 +MX6Q_PAD_GPIO_1__ANATOP_USBOTG_ID		1592 diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt index 506c7390c2b..13f1aa09b93 100644 --- a/Documentation/kdump/kdump.txt +++ b/Documentation/kdump/kdump.txt @@ -86,7 +86,7 @@ There is also a gitweb interface available at  http://www.kernel.org/git/?p=utils/kernel/kexec/kexec-tools.git  More information about kexec-tools can be found at -http://www.kernel.org/pub/linux/utils/kernel/kexec/README.html +http://horms.net/projects/kexec/  3) Unpack the tarball with the tar command, as follows: diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a92c5ebf373..12783fa833c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2367,6 +2367,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.  			Set maximum number of finished RCU callbacks to process  			in one batch. +	rcutree.fanout_leaf=	[KNL,BOOT] +			Increase the number of CPUs assigned to each +			leaf rcu_node structure.  Useful for very large +			systems. +  	rcutree.qhimark=	[KNL,BOOT]  			Set threshold of queued  			RCU callbacks over which batch limiting is disabled. diff --git a/MAINTAINERS b/MAINTAINERS index 1b71f6ceae0..fe643e7b9df 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3433,13 +3433,14 @@ S:	Supported  F:	drivers/idle/i7300_idle.c  IEEE 802.15.4 SUBSYSTEM +M:	Alexander Smirnov <alex.bluesman.smirnov@gmail.com>  M:	Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> -M:	Sergey Lapin <slapin@ossfans.org>  L:	linux-zigbee-devel@lists.sourceforge.net (moderated for non-subscribers)  W:	http://apps.sourceforge.net/trac/linux-zigbee  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lowpan/lowpan.git  S:	Maintained  F:	net/ieee802154/ +F:	net/mac802154/  F:	drivers/ieee802154/  IIO SUBSYSTEM AND DRIVERS @@ -5564,7 +5565,7 @@ F:	Documentation/networking/LICENSE.qla3xxx  F:	drivers/net/ethernet/qlogic/qla3xxx.*  QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER -M:	Anirban Chakraborty <anirban.chakraborty@qlogic.com> +M:	Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>  M:	Sony Chacko <sony.chacko@qlogic.com>  M:	linux-driver@qlogic.com  L:	netdev@vger.kernel.org @@ -5572,7 +5573,6 @@ S:	Supported  F:	drivers/net/ethernet/qlogic/qlcnic/  QLOGIC QLGE 10Gb ETHERNET DRIVER -M:	Anirban Chakraborty <anirban.chakraborty@qlogic.com>  M:	Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>  M:	Ron Mercer <ron.mercer@qlogic.com>  M:	linux-driver@qlogic.com @@ -1,7 +1,7 @@  VERSION = 3  PATCHLEVEL = 5  SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION =  NAME = Saber-toothed Squirrel  # *DOCUMENTATION* diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi index 10dcec7e732..f7b84aced65 100644 --- a/arch/arm/boot/dts/spear13xx.dtsi +++ b/arch/arm/boot/dts/spear13xx.dtsi @@ -43,8 +43,8 @@  	pmu {  		compatible = "arm,cortex-a9-pmu"; -		interrupts = <0 8 0x04 -			      0 9 0x04>; +		interrupts = <0 6 0x04 +			      0 7 0x04>;  	};  	L2: l2-cache { @@ -119,8 +119,8 @@  		gmac0: eth@e2000000 {  			compatible = "st,spear600-gmac";  			reg = <0xe2000000 0x8000>; -			interrupts = <0 23 0x4 -				      0 24 0x4>; +			interrupts = <0 33 0x4 +				      0 34 0x4>;  			interrupt-names = "macirq", "eth_wake_irq";  			status = "disabled";  		}; @@ -202,6 +202,7 @@  			kbd@e0300000 {  				compatible = "st,spear300-kbd";  				reg = <0xe0300000 0x1000>; +				interrupts = <0 52 0x4>;  				status = "disabled";  			}; @@ -224,7 +225,7 @@  			serial@e0000000 {  				compatible = "arm,pl011", "arm,primecell";  				reg = <0xe0000000 0x1000>; -				interrupts = <0 36 0x4>; +				interrupts = <0 35 0x4>;  				status = "disabled";  			}; diff --git a/arch/arm/boot/dts/spear320-evb.dts b/arch/arm/boot/dts/spear320-evb.dts index c13fd1f3b09..e4e912f9502 100644 --- a/arch/arm/boot/dts/spear320-evb.dts +++ b/arch/arm/boot/dts/spear320-evb.dts @@ -15,8 +15,8 @@  /include/ "spear320.dtsi"  / { -	model = "ST SPEAr300 Evaluation Board"; -	compatible = "st,spear300-evb", "st,spear300"; +	model = "ST SPEAr320 Evaluation Board"; +	compatible = "st,spear320-evb", "st,spear320";  	#address-cells = <1>;  	#size-cells = <1>; @@ -26,7 +26,7 @@  	ahb {  		pinmux@b3000000 { -			st,pinmux-mode = <3>; +			st,pinmux-mode = <4>;  			pinctrl-names = "default";  			pinctrl-0 = <&state_default>; diff --git a/arch/arm/boot/dts/spear600.dtsi b/arch/arm/boot/dts/spear600.dtsi index 089f0a42c50..a3c36e47d7e 100644 --- a/arch/arm/boot/dts/spear600.dtsi +++ b/arch/arm/boot/dts/spear600.dtsi @@ -181,6 +181,7 @@  			timer@f0000000 {  				compatible = "st,spear-timer";  				reg = <0xf0000000 0x400>; +				interrupt-parent = <&vic0>;  				interrupts = <16>;  			};  		}; diff --git a/arch/arm/mach-spear3xx/spear3xx.c b/arch/arm/mach-spear3xx/spear3xx.c index 0f41bd1c47c..66db5f13af8 100644 --- a/arch/arm/mach-spear3xx/spear3xx.c +++ b/arch/arm/mach-spear3xx/spear3xx.c @@ -87,7 +87,7 @@ void __init spear3xx_map_io(void)  static void __init spear3xx_timer_init(void)  { -	char pclk_name[] = "pll3_48m_clk"; +	char pclk_name[] = "pll3_clk";  	struct clk *gpt_clk, *pclk;  	spear3xx_clk_init(); diff --git a/arch/arm/mach-spear6xx/spear6xx.c b/arch/arm/mach-spear6xx/spear6xx.c index 2e2e3596583..9af67d003c6 100644 --- a/arch/arm/mach-spear6xx/spear6xx.c +++ b/arch/arm/mach-spear6xx/spear6xx.c @@ -423,7 +423,7 @@ void __init spear6xx_map_io(void)  static void __init spear6xx_timer_init(void)  { -	char pclk_name[] = "pll3_48m_clk"; +	char pclk_name[] = "pll3_clk";  	struct clk *gpt_clk, *pclk;  	spear6xx_clk_init(); diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 4044abcf6f9..655878bcc96 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1091,7 +1091,7 @@ error:  	while (--i)  		if (pages[i])  			__free_pages(pages[i], 0); -	if (array_size < PAGE_SIZE) +	if (array_size <= PAGE_SIZE)  		kfree(pages);  	else  		vfree(pages); @@ -1106,7 +1106,7 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t s  	for (i = 0; i < count; i++)  		if (pages[i])  			__free_pages(pages[i], 0); -	if (array_size < PAGE_SIZE) +	if (array_size <= PAGE_SIZE)  		kfree(pages);  	else  		vfree(pages); diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c index f7264621e58..149fbefc1a4 100644 --- a/arch/hexagon/kernel/smp.c +++ b/arch/hexagon/kernel/smp.c @@ -180,9 +180,7 @@ void __cpuinit start_secondary(void)  	notify_cpu_starting(cpu); -	ipi_call_lock();  	set_cpu_online(cpu, true); -	ipi_call_unlock();  	local_irq_enable(); diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 1113b8aba07..963d2db53bf 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -382,7 +382,6 @@ smp_callin (void)  	set_numa_node(cpu_to_node_map[cpuid]);  	set_numa_mem(local_memory_node(cpu_to_node_map[cpuid])); -	ipi_call_lock_irq();  	spin_lock(&vector_lock);  	/* Setup the per cpu irq handling data structures */  	__setup_vector_irq(cpuid); @@ -390,7 +389,6 @@ smp_callin (void)  	set_cpu_online(cpuid, true);  	per_cpu(cpu_state, cpuid) = CPU_ONLINE;  	spin_unlock(&vector_lock); -	ipi_call_unlock_irq();  	smp_setup_percpu_timer(); diff --git a/arch/m32r/boot/compressed/Makefile b/arch/m32r/boot/compressed/Makefile index 177716b1d61..01729c2979b 100644 --- a/arch/m32r/boot/compressed/Makefile +++ b/arch/m32r/boot/compressed/Makefile @@ -43,9 +43,9 @@ endif  OBJCOPYFLAGS += -R .empty_zero_page -suffix_$(CONFIG_KERNEL_GZIP)	= gz -suffix_$(CONFIG_KERNEL_BZIP2)	= bz2 -suffix_$(CONFIG_KERNEL_LZMA)	= lzma +suffix-$(CONFIG_KERNEL_GZIP)	= gz +suffix-$(CONFIG_KERNEL_BZIP2)	= bz2 +suffix-$(CONFIG_KERNEL_LZMA)	= lzma  $(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) FORCE  	$(call if_changed,ld) diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c index 370d6088197..28a09529f20 100644 --- a/arch/m32r/boot/compressed/misc.c +++ b/arch/m32r/boot/compressed/misc.c @@ -28,7 +28,7 @@ static unsigned long free_mem_ptr;  static unsigned long free_mem_end_ptr;  #ifdef CONFIG_KERNEL_BZIP2 -static void *memset(void *s, int c, size_t n) +void *memset(void *s, int c, size_t n)  {  	char *ss = s; @@ -39,6 +39,16 @@ static void *memset(void *s, int c, size_t n)  #endif  #ifdef CONFIG_KERNEL_GZIP +void *memcpy(void *dest, const void *src, size_t n) +{ +	char *d = dest; +	const char *s = src; +	while (n--) +		*d++ = *s++; + +	return dest; +} +  #define BOOT_HEAP_SIZE             0x10000  #include "../../../../lib/decompress_inflate.c"  #endif diff --git a/arch/m32r/include/asm/ptrace.h b/arch/m32r/include/asm/ptrace.h index 527527584dd..4313aa62b51 100644 --- a/arch/m32r/include/asm/ptrace.h +++ b/arch/m32r/include/asm/ptrace.h @@ -113,9 +113,6 @@ struct pt_regs {  #define PTRACE_OLDSETOPTIONS	21 -/* options set using PTRACE_SETOPTIONS */ -#define PTRACE_O_TRACESYSGOOD	0x00000001 -  #ifdef __KERNEL__  #include <asm/m32r.h>		/* M32R_PSW_BSM, M32R_PSW_BPM */ diff --git a/arch/m32r/include/asm/smp.h b/arch/m32r/include/asm/smp.h index cf7829a6155..c689b828dfe 100644 --- a/arch/m32r/include/asm/smp.h +++ b/arch/m32r/include/asm/smp.h @@ -79,11 +79,6 @@ static __inline__ int cpu_number_map(int cpu)  	return cpu;  } -static __inline__ unsigned int num_booting_cpus(void) -{ -	return cpumask_weight(&cpu_callout_map); -} -  extern void smp_send_timer(void);  extern unsigned long send_IPI_mask_phys(const cpumask_t*, int, int); diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c index 4c03361537a..51f5e9aa490 100644 --- a/arch/m32r/kernel/ptrace.c +++ b/arch/m32r/kernel/ptrace.c @@ -591,17 +591,16 @@ void user_enable_single_step(struct task_struct *child)  	if (access_process_vm(child, pc&~3, &insn, sizeof(insn), 0)  	    != sizeof(insn)) -		return -EIO; +		return;  	compute_next_pc(insn, pc, &next_pc, child);  	if (next_pc & 0x80000000) -		return -EIO; +		return;  	if (embed_debug_trap(child, next_pc)) -		return -EIO; +		return;  	invalidate_cache(); -	return 0;  }  void user_disable_single_step(struct task_struct *child) diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c index f3fb2c029cf..d0f60b97bbc 100644 --- a/arch/m32r/kernel/signal.c +++ b/arch/m32r/kernel/signal.c @@ -286,7 +286,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,  			case -ERESTARTNOINTR:  				regs->r0 = regs->orig_r0;  				if (prev_insn(regs) < 0) -					return -EFAULT; +					return;  		}  	} diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 09ab87ee6fe..b3e10fdd389 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -288,6 +288,7 @@ config MIPS_MALTA  	select SYS_HAS_CPU_MIPS32_R1  	select SYS_HAS_CPU_MIPS32_R2  	select SYS_HAS_CPU_MIPS64_R1 +	select SYS_HAS_CPU_MIPS64_R2  	select SYS_HAS_CPU_NEVADA  	select SYS_HAS_CPU_RM7000  	select SYS_HAS_EARLY_PRINTK @@ -1423,6 +1424,7 @@ config CPU_SB1  config CPU_CAVIUM_OCTEON  	bool "Cavium Octeon processor"  	depends on SYS_HAS_CPU_CAVIUM_OCTEON +	select ARCH_SPARSEMEM_ENABLE  	select CPU_HAS_PREFETCH  	select CPU_SUPPORTS_64BIT_KERNEL  	select SYS_SUPPORTS_SMP diff --git a/arch/mips/bcm47xx/Kconfig b/arch/mips/bcm47xx/Kconfig index 6210b8d8410..b311be45a72 100644 --- a/arch/mips/bcm47xx/Kconfig +++ b/arch/mips/bcm47xx/Kconfig @@ -21,6 +21,7 @@ config BCM47XX_BCMA  	select BCMA  	select BCMA_HOST_SOC  	select BCMA_DRIVER_MIPS +	select BCMA_HOST_PCI if PCI  	select BCMA_DRIVER_PCI_HOSTMODE if PCI  	default y  	help diff --git a/arch/mips/bcm63xx/dev-pcmcia.c b/arch/mips/bcm63xx/dev-pcmcia.c index de4d917fd54..a551bab5ecb 100644 --- a/arch/mips/bcm63xx/dev-pcmcia.c +++ b/arch/mips/bcm63xx/dev-pcmcia.c @@ -79,11 +79,11 @@ static int __init config_pcmcia_cs(unsigned int cs,  	return ret;  } -static const __initdata struct { +static const struct {  	unsigned int	cs;  	unsigned int	base;  	unsigned int	size; -} pcmcia_cs[3] = { +} pcmcia_cs[3] __initconst = {  	{  		.cs	= MPI_CS_PCMCIA_COMMON,  		.base	= BCM_PCMCIA_COMMON_BASE_PA, diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig index f9e275a50d9..2f4f6d5e05b 100644 --- a/arch/mips/cavium-octeon/Kconfig +++ b/arch/mips/cavium-octeon/Kconfig @@ -82,10 +82,6 @@ config CAVIUM_OCTEON_LOCK_L2_MEMCPY  	help  	  Lock the kernel's implementation of memcpy() into L2. -config ARCH_SPARSEMEM_ENABLE -	def_bool y -	select SPARSEMEM_STATIC -  config IOMMU_HELPER  	bool diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 4b93048044e..ee1fb9f7f51 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -185,7 +185,6 @@ static void __cpuinit octeon_init_secondary(void)  	octeon_init_cvmcount();  	octeon_irq_setup_secondary(); -	raw_local_irq_enable();  }  /** @@ -233,6 +232,7 @@ static void octeon_smp_finish(void)  	/* to generate the first CPU timer interrupt */  	write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ); +	local_irq_enable();  }  /** diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index 2e1ad4c652b..82ad35ce2b4 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -17,7 +17,6 @@  #include <linux/irqflags.h>  #include <linux/types.h>  #include <asm/barrier.h> -#include <asm/bug.h>  #include <asm/byteorder.h>		/* sigh ... */  #include <asm/cpu-features.h>  #include <asm/sgidefs.h> diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index 285a41fa0b1..eee10dc07ac 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -8,6 +8,7 @@  #ifndef __ASM_CMPXCHG_H  #define __ASM_CMPXCHG_H +#include <linux/bug.h>  #include <linux/irqflags.h>  #include <asm/war.h> diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index f9fa2a479dd..95e40c1e8ed 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -94,6 +94,7 @@  #define PRID_IMP_24KE		0x9600  #define PRID_IMP_74K		0x9700  #define PRID_IMP_1004K		0x9900 +#define PRID_IMP_M14KC		0x9c00  /*   * These are the PRID's for when 23:16 == PRID_COMP_SIBYTE @@ -260,12 +261,12 @@ enum cpu_type_enum {  	 */  	CPU_4KC, CPU_4KEC, CPU_4KSC, CPU_24K, CPU_34K, CPU_1004K, CPU_74K,  	CPU_ALCHEMY, CPU_PR4450, CPU_BMIPS32, CPU_BMIPS3300, CPU_BMIPS4350, -	CPU_BMIPS4380, CPU_BMIPS5000, CPU_JZRISC, +	CPU_BMIPS4380, CPU_BMIPS5000, CPU_JZRISC, CPU_M14KC,  	/*  	 * MIPS64 class processors  	 */ -	CPU_5KC, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2, +	CPU_5KC, CPU_5KE, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2,  	CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS, CPU_CAVIUM_OCTEON2,  	CPU_XLR, CPU_XLP, @@ -288,7 +289,7 @@ enum cpu_type_enum {  #define MIPS_CPU_ISA_M64R2	0x00000100  #define MIPS_CPU_ISA_32BIT (MIPS_CPU_ISA_I | MIPS_CPU_ISA_II | \ -	MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 ) +	MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2)  #define MIPS_CPU_ISA_64BIT (MIPS_CPU_ISA_III | MIPS_CPU_ISA_IV | \  	MIPS_CPU_ISA_V | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2) diff --git a/arch/mips/include/asm/gic.h b/arch/mips/include/asm/gic.h index 86548da650e..991b659e254 100644 --- a/arch/mips/include/asm/gic.h +++ b/arch/mips/include/asm/gic.h @@ -206,7 +206,7 @@  #define GIC_VPE_EIC_SHADOW_SET_BASE	0x0100  #define GIC_VPE_EIC_SS(intr) \ -	(GIC_EIC_SHADOW_SET_BASE + (4 * intr)) +	(GIC_VPE_EIC_SHADOW_SET_BASE + (4 * intr))  #define GIC_VPE_EIC_VEC_BASE		0x0800  #define GIC_VPE_EIC_VEC(intr) \ @@ -330,6 +330,17 @@ struct gic_intr_map {  #define GIC_FLAG_TRANSPARENT   0x02  }; +/* + * This is only used in EIC mode. This helps to figure out which + * shared interrupts we need to process when we get a vector interrupt. + */ +#define GIC_MAX_SHARED_INTR  0x5 +struct gic_shared_intr_map { +	unsigned int num_shared_intr; +	unsigned int intr_list[GIC_MAX_SHARED_INTR]; +	unsigned int local_intr_mask; +}; +  extern void gic_init(unsigned long gic_base_addr,  	unsigned long gic_addrspace_size, struct gic_intr_map *intrmap,  	unsigned int intrmap_size, unsigned int irqbase); @@ -338,5 +349,7 @@ extern unsigned int gic_get_int(void);  extern void gic_send_ipi(unsigned int intr);  extern unsigned int plat_ipi_call_int_xlate(unsigned int);  extern unsigned int plat_ipi_resched_int_xlate(unsigned int); +extern void gic_bind_eic_interrupt(int irq, int set); +extern unsigned int gic_get_timer_pending(void);  #endif /* _ASM_GICREGS_H */ diff --git a/arch/mips/include/asm/inst.h b/arch/mips/include/asm/inst.h index 7ebfc392e58..ab84064283d 100644 --- a/arch/mips/include/asm/inst.h +++ b/arch/mips/include/asm/inst.h @@ -251,7 +251,7 @@ struct f_format {	/* FPU register format */  	unsigned int func : 6;  }; -struct ma_format {	/* FPU multipy and add format (MIPS IV) */ +struct ma_format {	/* FPU multiply and add format (MIPS IV) */  	unsigned int opcode : 6;  	unsigned int fr : 5;  	unsigned int ft : 5; @@ -324,7 +324,7 @@ struct f_format {	/* FPU register format */  	unsigned int opcode : 6;  }; -struct ma_format {	/* FPU multipy and add format (MIPS IV) */ +struct ma_format {	/* FPU multiply and add format (MIPS IV) */  	unsigned int fmt : 2;  	unsigned int func : 4;  	unsigned int fd : 5; diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index a58f22998a8..29d9c23c20c 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -17,6 +17,7 @@  #include <linux/types.h>  #include <asm/addrspace.h> +#include <asm/bug.h>  #include <asm/byteorder.h>  #include <asm/cpu.h>  #include <asm/cpu-features.h> diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index fb698dc09bc..78dbb8a86da 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -136,6 +136,7 @@ extern void free_irqno(unsigned int irq);   * IE7.  Since R2 their number has to be read from the c0_intctl register.   */  #define CP0_LEGACY_COMPARE_IRQ 7 +#define CP0_LEGACY_PERFCNT_IRQ 7  extern int cp0_compare_irq;  extern int cp0_compare_irq_shift; diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h index 94d4faad29a..fdcd78ca1b0 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h @@ -99,7 +99,7 @@  #define CKCTL_6368_USBH_CLK_EN		(1 << 15)  #define CKCTL_6368_DISABLE_GLESS_EN	(1 << 16)  #define CKCTL_6368_NAND_CLK_EN		(1 << 17) -#define CKCTL_6368_IPSEC_CLK_EN		(1 << 17) +#define CKCTL_6368_IPSEC_CLK_EN		(1 << 18)  #define CKCTL_6368_ALL_SAFE_EN		(CKCTL_6368_SWPKT_USB_EN |	\  					CKCTL_6368_SWPKT_SAR_EN |	\ diff --git a/arch/mips/include/asm/mips-boards/maltaint.h b/arch/mips/include/asm/mips-boards/maltaint.h index d11aa02a956..5447d9fc421 100644 --- a/arch/mips/include/asm/mips-boards/maltaint.h +++ b/arch/mips/include/asm/mips-boards/maltaint.h @@ -86,6 +86,16 @@  #define GIC_CPU_INT4		4 /* .			*/  #define GIC_CPU_INT5		5 /* Core Interrupt 5   */ +/* MALTA GIC local interrupts */ +#define GIC_INT_TMR             (GIC_CPU_INT5) +#define GIC_INT_PERFCTR         (GIC_CPU_INT5) + +/* GIC constants */ +/* Add 2 to convert non-eic hw int # to eic vector # */ +#define GIC_CPU_TO_VEC_OFFSET   (2) +/* If we map an intr to pin X, GIC will actually generate vector X+1 */ +#define GIC_PIN_TO_VEC_OFFSET   (1) +  #define GIC_EXT_INTR(x)		x  /* External Interrupts used for IPI */ diff --git a/arch/mips/include/asm/mipsmtregs.h b/arch/mips/include/asm/mipsmtregs.h index c9420aa97e3..e71ff4c317f 100644 --- a/arch/mips/include/asm/mipsmtregs.h +++ b/arch/mips/include/asm/mipsmtregs.h @@ -48,7 +48,7 @@  #define CP0_VPECONF0		$1, 2  #define CP0_VPECONF1		$1, 3  #define CP0_YQMASK		$1, 4 -#define CP0_VPESCHEDULE	$1, 5 +#define CP0_VPESCHEDULE		$1, 5  #define CP0_VPESCHEFBK		$1, 6  #define CP0_TCSTATUS		$2, 1  #define CP0_TCBIND		$2, 2 diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h index 5d33621b565..4f8ddba8c36 100644 --- a/arch/mips/include/asm/switch_to.h +++ b/arch/mips/include/asm/switch_to.h @@ -22,7 +22,7 @@ struct task_struct;   * switch_to(n) should switch tasks to task nr n, first   * checking that n isn't the current task, in which case it does nothing.   */ -extern asmlinkage void *resume(void *last, void *next, void *next_ti); +extern asmlinkage void *resume(void *last, void *next, void *next_ti, u32 __usedfpu);  extern unsigned int ll_bit;  extern struct task_struct *ll_task; @@ -66,11 +66,13 @@ do {									\  #define switch_to(prev, next, last)					\  do {									\ +	u32 __usedfpu;							\  	__mips_mt_fpaff_switch_to(prev);				\  	if (cpu_has_dsp)						\  		__save_dsp(prev);					\  	__clear_software_ll_bit();					\ -	(last) = resume(prev, next, task_thread_info(next));		\ +	__usedfpu = test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU);	\ +	(last) = resume(prev, next, task_thread_info(next), __usedfpu);	\  } while (0)  #define finish_arch_switch(prev)					\ diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index e2eca7d1059..ca97e0ecb64 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -60,6 +60,8 @@ struct thread_info {  register struct thread_info *__current_thread_info __asm__("$28");  #define current_thread_info()  __current_thread_info +#endif /* !__ASSEMBLY__ */ +  /* thread information allocation */  #if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_32BIT)  #define THREAD_SIZE_ORDER (1) @@ -85,8 +87,6 @@ register struct thread_info *__current_thread_info __asm__("$28");  #define STACK_WARN	(THREAD_SIZE / 8) -#endif /* !__ASSEMBLY__ */ -  #define PREEMPT_ACTIVE		0x10000000  /* diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 6ae7ce4ac63..f4630e1082a 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -4,7 +4,7 @@   * Copyright (C) xxxx  the Anonymous   * Copyright (C) 1994 - 2006 Ralf Baechle   * Copyright (C) 2003, 2004  Maciej W. Rozycki - * Copyright (C) 2001, 2004  MIPS Inc. + * Copyright (C) 2001, 2004, 2011, 2012  MIPS Technologies, Inc.   *   * This program is free software; you can redistribute it and/or   * modify it under the terms of the GNU General Public License @@ -199,6 +199,7 @@ void __init check_wait(void)  		cpu_wait = rm7k_wait_irqoff;  		break; +	case CPU_M14KC:  	case CPU_24K:  	case CPU_34K:  	case CPU_1004K: @@ -810,6 +811,10 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu)  		c->cputype = CPU_5KC;  		__cpu_name[cpu] = "MIPS 5Kc";  		break; +	case PRID_IMP_5KE: +		c->cputype = CPU_5KE; +		__cpu_name[cpu] = "MIPS 5KE"; +		break;  	case PRID_IMP_20KC:  		c->cputype = CPU_20KC;  		__cpu_name[cpu] = "MIPS 20Kc"; @@ -831,6 +836,10 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu)  		c->cputype = CPU_74K;  		__cpu_name[cpu] = "MIPS 74Kc";  		break; +	case PRID_IMP_M14KC: +		c->cputype = CPU_M14KC; +		__cpu_name[cpu] = "MIPS M14Kc"; +		break;  	case PRID_IMP_1004K:  		c->cputype = CPU_1004K;  		__cpu_name[cpu] = "MIPS 1004Kc"; diff --git a/arch/mips/kernel/mips_ksyms.c b/arch/mips/kernel/mips_ksyms.c index 57ba13edb03..3fc1691110d 100644 --- a/arch/mips/kernel/mips_ksyms.c +++ b/arch/mips/kernel/mips_ksyms.c @@ -5,7 +5,7 @@   * License.  See the file "COPYING" in the main directory of this archive   * for more details.   * - * Copyright (C) 1996, 97, 98, 99, 2000, 01, 03, 04, 05 by Ralf Baechle + * Copyright (C) 1996, 97, 98, 99, 2000, 01, 03, 04, 05, 12 by Ralf Baechle   * Copyright (C) 1999, 2000, 01 Silicon Graphics, Inc.   */  #include <linux/interrupt.h> @@ -35,6 +35,12 @@ EXPORT_SYMBOL(memmove);  EXPORT_SYMBOL(kernel_thread);  /* + * Functions that operate on entire pages.  Mostly used by memory management. + */ +EXPORT_SYMBOL(clear_page); +EXPORT_SYMBOL(copy_page); + +/*   * Userspace access stuff.   */  EXPORT_SYMBOL(__copy_user); diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S index ce89c806170..0441f54b2a6 100644 --- a/arch/mips/kernel/octeon_switch.S +++ b/arch/mips/kernel/octeon_switch.S @@ -31,7 +31,7 @@  /*   * task_struct *resume(task_struct *prev, task_struct *next, - *                     struct thread_info *next_ti) + *                     struct thread_info *next_ti, int usedfpu)   */  	.align	7  	LEAF(resume) diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index f29099b104c..eb5e394a465 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -162,11 +162,6 @@ static unsigned int counters_total_to_per_cpu(unsigned int counters)  	return counters >> vpe_shift();  } -static unsigned int counters_per_cpu_to_total(unsigned int counters) -{ -	return counters << vpe_shift(); -} -  #else /* !CONFIG_MIPS_MT_SMP */  #define vpe_id()	0 diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S index 293898391e6..9c51be5a163 100644 --- a/arch/mips/kernel/r2300_switch.S +++ b/arch/mips/kernel/r2300_switch.S @@ -43,7 +43,7 @@  /*   * task_struct *resume(task_struct *prev, task_struct *next, - *                     struct thread_info *next_ti) ) + *                     struct thread_info *next_ti, int usedfpu)   */  LEAF(resume)  	mfc0	t1, CP0_STATUS @@ -51,18 +51,9 @@ LEAF(resume)  	cpu_save_nonscratch a0  	sw	ra, THREAD_REG31(a0) -	/* -	 * check if we need to save FPU registers -	 */ -	lw	t3, TASK_THREAD_INFO(a0) -	lw	t0, TI_FLAGS(t3) -	li	t1, _TIF_USEDFPU -	and	t2, t0, t1 -	beqz	t2, 1f -	nor	t1, zero, t1 +	beqz	a3, 1f -	and	t0, t0, t1 -	sw	t0, TI_FLAGS(t3) +	PTR_L	t3, TASK_THREAD_INFO(a0)  	/*  	 * clear saved user stack CU1 bit diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S index 9414f935446..42d2a393842 100644 --- a/arch/mips/kernel/r4k_switch.S +++ b/arch/mips/kernel/r4k_switch.S @@ -41,7 +41,7 @@  /*   * task_struct *resume(task_struct *prev, task_struct *next, - *                     struct thread_info *next_ti) + *                     struct thread_info *next_ti, int usedfpu)   */  	.align	5  	LEAF(resume) @@ -53,16 +53,10 @@  	/*  	 * check if we need to save FPU registers  	 */ -	PTR_L	t3, TASK_THREAD_INFO(a0) -	LONG_L	t0, TI_FLAGS(t3) -	li	t1, _TIF_USEDFPU -	and	t2, t0, t1 -	beqz	t2, 1f -	nor	t1, zero, t1 -	and	t0, t0, t1 -	LONG_S	t0, TI_FLAGS(t3) +	beqz    a3, 1f +	PTR_L	t3, TASK_THREAD_INFO(a0)  	/*  	 * clear saved user stack CU1 bit  	 */ diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 3046e298600..8e393b8443f 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -15,7 +15,6 @@  #include <linux/smp.h>  #include <linux/interrupt.h>  #include <linux/spinlock.h> -#include <linux/init.h>  #include <linux/cpu.h>  #include <linux/cpumask.h>  #include <linux/reboot.h> @@ -197,13 +196,6 @@ static void bmips_init_secondary(void)  	write_c0_brcm_action(ACTION_CLR_IPI(smp_processor_id(), 0));  #endif - -	/* make sure there won't be a timer interrupt for a little while */ -	write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ); - -	irq_enable_hazard(); -	set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ1 | IE_IRQ5 | ST0_IE); -	irq_enable_hazard();  }  /* @@ -212,6 +204,13 @@ static void bmips_init_secondary(void)  static void bmips_smp_finish(void)  {  	pr_info("SMP: CPU%d is running\n", smp_processor_id()); + +	/* make sure there won't be a timer interrupt for a little while */ +	write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ); + +	irq_enable_hazard(); +	set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ1 | IE_IRQ5 | ST0_IE); +	irq_enable_hazard();  }  /* diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 48650c81804..1268392f1d2 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -122,13 +122,21 @@ asmlinkage __cpuinit void start_secondary(void)  	notify_cpu_starting(cpu); -	mp_ops->smp_finish(); +	set_cpu_online(cpu, true); +  	set_cpu_sibling_map(cpu);  	cpu_set(cpu, cpu_callin_map);  	synchronise_count_slave(); +	/* +	 * irq will be enabled in ->smp_finish(), enabling it too early +	 * is dangerous. +	 */ +	WARN_ON_ONCE(!irqs_disabled()); +	mp_ops->smp_finish(); +  	cpu_idle();  } @@ -196,8 +204,6 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)  	while (!cpu_isset(cpu, cpu_callin_map))  		udelay(100); -	set_cpu_online(cpu, true); -  	return 0;  } diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index f5dd38f1d01..15b5f3cfd20 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -322,7 +322,7 @@ int __init smtc_build_cpu_map(int start_cpu_slot)  /*   * Common setup before any secondaries are started - * Make sure all CPU's are in a sensible state before we boot any of the + * Make sure all CPUs are in a sensible state before we boot any of the   * secondaries.   *   * For MIPS MT "SMTC" operation, we set up all TCs, spread as evenly @@ -340,12 +340,12 @@ static void smtc_tc_setup(int vpe, int tc, int cpu)  	/*  	 * TCContext gets an offset from the base of the IPIQ array  	 * to be used in low-level code to detect the presence of -	 * an active IPI queue +	 * an active IPI queue.  	 */  	write_tc_c0_tccontext((sizeof(struct smtc_ipi_q) * cpu) << 16);  	/* Bind tc to vpe */  	write_tc_c0_tcbind(vpe); -	/* In general, all TCs should have the same cpu_data indications */ +	/* In general, all TCs should have the same cpu_data indications. */  	memcpy(&cpu_data[cpu], &cpu_data[0], sizeof(struct cpuinfo_mips));  	/* For 34Kf, start with TC/CPU 0 as sole owner of single FPU context */  	if (cpu_data[0].cputype == CPU_34K || @@ -358,8 +358,8 @@ static void smtc_tc_setup(int vpe, int tc, int cpu)  }  /* - * Tweak to get Count registes in as close a sync as possible. - * Value seems good for 34K-class cores. + * Tweak to get Count registes in as close a sync as possible.  The + * value seems good for 34K-class cores.   */  #define CP0_SKEW 8 @@ -615,7 +615,6 @@ void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle)  void smtc_init_secondary(void)  { -	local_irq_enable();  }  void smtc_smp_finish(void) @@ -631,6 +630,8 @@ void smtc_smp_finish(void)  	if (cpu > 0 && (cpu_data[cpu].vpe_id != cpu_data[cpu - 1].vpe_id))  		write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ); +	local_irq_enable(); +  	printk("TC %d going on-line as CPU %d\n",  		cpu_data[smp_processor_id()].tc_id, smp_processor_id());  } diff --git a/arch/mips/kernel/sync-r4k.c b/arch/mips/kernel/sync-r4k.c index 99f913c8d7a..842d55e411f 100644 --- a/arch/mips/kernel/sync-r4k.c +++ b/arch/mips/kernel/sync-r4k.c @@ -111,7 +111,6 @@ void __cpuinit synchronise_count_master(void)  void __cpuinit synchronise_count_slave(void)  {  	int i; -	unsigned long flags;  	unsigned int initcount;  	int ncpus; @@ -123,8 +122,6 @@ void __cpuinit synchronise_count_slave(void)  	return;  #endif -	local_irq_save(flags); -  	/*  	 * Not every cpu is online at the time this gets called,  	 * so we first wait for the master to say everyone is ready @@ -154,7 +151,5 @@ void __cpuinit synchronise_count_slave(void)  	}  	/* Arrange for an interrupt in a short while */  	write_c0_compare(read_c0_count() + COUNTON); - -	local_irq_restore(flags);  }  #undef NR_LOOPS diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 2d0c2a277f5..c3c29354370 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -132,6 +132,9 @@ static void show_backtrace(struct task_struct *task, const struct pt_regs *regs)  	unsigned long ra = regs->regs[31];  	unsigned long pc = regs->cp0_epc; +	if (!task) +		task = current; +  	if (raw_show_trace || !__kernel_text_address(pc)) {  		show_raw_backtrace(sp);  		return; @@ -1249,6 +1252,7 @@ static inline void parity_protection_init(void)  		break;  	case CPU_5KC: +	case CPU_5KE:  		write_c0_ecc(0x80000000);  		back_to_back_c0_hazard();  		/* Set the PE bit (bit 31) in the c0_errctl register. */ @@ -1498,6 +1502,7 @@ extern void flush_tlb_handlers(void);   * Timer interrupt   */  int cp0_compare_irq; +EXPORT_SYMBOL_GPL(cp0_compare_irq);  int cp0_compare_irq_shift;  /* @@ -1597,7 +1602,7 @@ void __cpuinit per_cpu_trap_init(bool is_boot_cpu)  			cp0_perfcount_irq = -1;  	} else {  		cp0_compare_irq = CP0_LEGACY_COMPARE_IRQ; -		cp0_compare_irq_shift = cp0_compare_irq; +		cp0_compare_irq_shift = CP0_LEGACY_PERFCNT_IRQ;  		cp0_perfcount_irq = -1;  	} diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 924da5eb703..df243a64f43 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -1,5 +1,6 @@  #include <asm/asm-offsets.h>  #include <asm/page.h> +#include <asm/thread_info.h>  #include <asm-generic/vmlinux.lds.h>  #undef mips @@ -72,7 +73,7 @@ SECTIONS  	.data : {	/* Data */  		. = . + DATAOFFSET;		/* for CONFIG_MAPPED_KERNEL */ -		INIT_TASK_DATA(PAGE_SIZE) +		INIT_TASK_DATA(THREAD_SIZE)  		NOSAVE_DATA  		CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)  		READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile index 4aa20280613..fd6203f14f1 100644 --- a/arch/mips/mm/Makefile +++ b/arch/mips/mm/Makefile @@ -3,8 +3,8 @@  #  obj-y				+= cache.o dma-default.o extable.o fault.o \ -				   gup.o init.o mmap.o page.o tlbex.o \ -				   tlbex-fault.o uasm.o +				   gup.o init.o mmap.o page.o page-funcs.o \ +				   tlbex.o tlbex-fault.o uasm.o  obj-$(CONFIG_32BIT)		+= ioremap.o pgtable-32.o  obj-$(CONFIG_64BIT)		+= pgtable-64.o diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 5109be96d98..f092c265dc6 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -977,7 +977,7 @@ static void __cpuinit probe_pcache(void)  			c->icache.linesz = 2 << lsize;  		else  			c->icache.linesz = lsize; -		c->icache.sets = 64 << ((config1 >> 22) & 7); +		c->icache.sets = 32 << (((config1 >> 22) + 1) & 7);  		c->icache.ways = 1 + ((config1 >> 16) & 7);  		icache_size = c->icache.sets * @@ -997,7 +997,7 @@ static void __cpuinit probe_pcache(void)  			c->dcache.linesz = 2 << lsize;  		else  			c->dcache.linesz= lsize; -		c->dcache.sets = 64 << ((config1 >> 13) & 7); +		c->dcache.sets = 32 << (((config1 >> 13) + 1) & 7);  		c->dcache.ways = 1 + ((config1 >> 7) & 7);  		dcache_size = c->dcache.sets * @@ -1051,6 +1051,7 @@ static void __cpuinit probe_pcache(void)  	case CPU_R14000:  		break; +	case CPU_M14KC:  	case CPU_24K:  	case CPU_34K:  	case CPU_74K: diff --git a/arch/mips/mm/page-funcs.S b/arch/mips/mm/page-funcs.S new file mode 100644 index 00000000000..48a6b38ff13 --- /dev/null +++ b/arch/mips/mm/page-funcs.S @@ -0,0 +1,50 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License.  See the file "COPYING" in the main directory of this archive + * for more details. + * + * Micro-assembler generated clear_page/copy_page functions. + * + * Copyright (C) 2012  MIPS Technologies, Inc. + * Copyright (C) 2012  Ralf Baechle <ralf@linux-mips.org> + */ +#include <asm/asm.h> +#include <asm/regdef.h> + +#ifdef CONFIG_SIBYTE_DMA_PAGEOPS +#define cpu_clear_page_function_name	clear_page_cpu +#define cpu_copy_page_function_name	copy_page_cpu +#else +#define cpu_clear_page_function_name	clear_page +#define cpu_copy_page_function_name	copy_page +#endif + +/* + * Maximum sizes: + * + * R4000 128 bytes S-cache:		0x058 bytes + * R4600 v1.7:				0x05c bytes + * R4600 v2.0:				0x060 bytes + * With prefetching, 16 word strides	0x120 bytes + */ +EXPORT(__clear_page_start) +LEAF(cpu_clear_page_function_name) +1:	j	1b		/* Dummy, will be replaced. */ +	.space 288 +END(cpu_clear_page_function_name) +EXPORT(__clear_page_end) + +/* + * Maximum sizes: + * + * R4000 128 bytes S-cache:		0x11c bytes + * R4600 v1.7:				0x080 bytes + * R4600 v2.0:				0x07c bytes + * With prefetching, 16 word strides	0x540 bytes + */ +EXPORT(__copy_page_start) +LEAF(cpu_copy_page_function_name) +1:	j	1b		/* Dummy, will be replaced. */ +	.space 1344 +END(cpu_copy_page_function_name) +EXPORT(__copy_page_end) diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c index cc0b626858b..98f530e1821 100644 --- a/arch/mips/mm/page.c +++ b/arch/mips/mm/page.c @@ -6,6 +6,7 @@   * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)   * Copyright (C) 2007  Maciej W. Rozycki   * Copyright (C) 2008  Thiemo Seufer + * Copyright (C) 2012  MIPS Technologies, Inc.   */  #include <linux/init.h>  #include <linux/kernel.h> @@ -71,45 +72,6 @@ static struct uasm_reloc __cpuinitdata relocs[5];  #define cpu_is_r4600_v1_x()	((read_c0_prid() & 0xfffffff0) == 0x00002010)  #define cpu_is_r4600_v2_x()	((read_c0_prid() & 0xfffffff0) == 0x00002020) -/* - * Maximum sizes: - * - * R4000 128 bytes S-cache:		0x058 bytes - * R4600 v1.7:				0x05c bytes - * R4600 v2.0:				0x060 bytes - * With prefetching, 16 word strides	0x120 bytes - */ - -static u32 clear_page_array[0x120 / 4]; - -#ifdef CONFIG_SIBYTE_DMA_PAGEOPS -void clear_page_cpu(void *page) __attribute__((alias("clear_page_array"))); -#else -void clear_page(void *page) __attribute__((alias("clear_page_array"))); -#endif - -EXPORT_SYMBOL(clear_page); - -/* - * Maximum sizes: - * - * R4000 128 bytes S-cache:		0x11c bytes - * R4600 v1.7:				0x080 bytes - * R4600 v2.0:				0x07c bytes - * With prefetching, 16 word strides	0x540 bytes - */ -static u32 copy_page_array[0x540 / 4]; - -#ifdef CONFIG_SIBYTE_DMA_PAGEOPS -void -copy_page_cpu(void *to, void *from) __attribute__((alias("copy_page_array"))); -#else -void copy_page(void *to, void *from) __attribute__((alias("copy_page_array"))); -#endif - -EXPORT_SYMBOL(copy_page); - -  static int pref_bias_clear_store __cpuinitdata;  static int pref_bias_copy_load __cpuinitdata;  static int pref_bias_copy_store __cpuinitdata; @@ -282,10 +244,15 @@ static inline void __cpuinit build_clear_pref(u32 **buf, int off)  		}  } +extern u32 __clear_page_start; +extern u32 __clear_page_end; +extern u32 __copy_page_start; +extern u32 __copy_page_end; +  void __cpuinit build_clear_page(void)  {  	int off; -	u32 *buf = (u32 *)&clear_page_array; +	u32 *buf = &__clear_page_start;  	struct uasm_label *l = labels;  	struct uasm_reloc *r = relocs;  	int i; @@ -356,17 +323,17 @@ void __cpuinit build_clear_page(void)  	uasm_i_jr(&buf, RA);  	uasm_i_nop(&buf); -	BUG_ON(buf > clear_page_array + ARRAY_SIZE(clear_page_array)); +	BUG_ON(buf > &__clear_page_end);  	uasm_resolve_relocs(relocs, labels);  	pr_debug("Synthesized clear page handler (%u instructions).\n", -		 (u32)(buf - clear_page_array)); +		 (u32)(buf - &__clear_page_start));  	pr_debug("\t.set push\n");  	pr_debug("\t.set noreorder\n"); -	for (i = 0; i < (buf - clear_page_array); i++) -		pr_debug("\t.word 0x%08x\n", clear_page_array[i]); +	for (i = 0; i < (buf - &__clear_page_start); i++) +		pr_debug("\t.word 0x%08x\n", (&__clear_page_start)[i]);  	pr_debug("\t.set pop\n");  } @@ -427,7 +394,7 @@ static inline void build_copy_store_pref(u32 **buf, int off)  void __cpuinit build_copy_page(void)  {  	int off; -	u32 *buf = (u32 *)©_page_array; +	u32 *buf = &__copy_page_start;  	struct uasm_label *l = labels;  	struct uasm_reloc *r = relocs;  	int i; @@ -595,21 +562,23 @@ void __cpuinit build_copy_page(void)  	uasm_i_jr(&buf, RA);  	uasm_i_nop(&buf); -	BUG_ON(buf > copy_page_array + ARRAY_SIZE(copy_page_array)); +	BUG_ON(buf > &__copy_page_end);  	uasm_resolve_relocs(relocs, labels);  	pr_debug("Synthesized copy page handler (%u instructions).\n", -		 (u32)(buf - copy_page_array)); +		 (u32)(buf - &__copy_page_start));  	pr_debug("\t.set push\n");  	pr_debug("\t.set noreorder\n"); -	for (i = 0; i < (buf - copy_page_array); i++) -		pr_debug("\t.word 0x%08x\n", copy_page_array[i]); +	for (i = 0; i < (buf - &__copy_page_start); i++) +		pr_debug("\t.word 0x%08x\n", (&__copy_page_start)[i]);  	pr_debug("\t.set pop\n");  }  #ifdef CONFIG_SIBYTE_DMA_PAGEOPS +extern void clear_page_cpu(void *page); +extern void copy_page_cpu(void *to, void *from);  /*   * Pad descriptors to cacheline, since each is exclusively owned by a diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 0bc485b3cd6..03eb0ef9158 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -9,6 +9,7 @@   * Copyright (C) 2005, 2007, 2008, 2009  Maciej W. Rozycki   * Copyright (C) 2006  Ralf Baechle (ralf@linux-mips.org)   * Copyright (C) 2008, 2009 Cavium Networks, Inc. + * Copyright (C) 2011  MIPS Technologies, Inc.   *   * ... and the days got worse and worse and now you see   * I've gone completly out of my mind. @@ -494,6 +495,7 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,  	case CPU_R14000:  	case CPU_4KC:  	case CPU_4KEC: +	case CPU_M14KC:  	case CPU_SB1:  	case CPU_SB1A:  	case CPU_4KSC: diff --git a/arch/mips/mti-malta/malta-pci.c b/arch/mips/mti-malta/malta-pci.c index bf80921f2f5..284dea54faf 100644 --- a/arch/mips/mti-malta/malta-pci.c +++ b/arch/mips/mti-malta/malta-pci.c @@ -241,8 +241,9 @@ void __init mips_pcibios_init(void)  		return;  	} -	if (controller->io_resource->start < 0x00001000UL)	/* FIXME */ -		controller->io_resource->start = 0x00001000UL; +	/* Change start address to avoid conflicts with ACPI and SMB devices */ +	if (controller->io_resource->start < 0x00002000UL) +		controller->io_resource->start = 0x00002000UL;  	iomem_resource.end &= 0xfffffffffULL;			/* 64 GB */  	ioport_resource.end = controller->io_resource->end; @@ -253,7 +254,7 @@ void __init mips_pcibios_init(void)  }  /* Enable PCI 2.1 compatibility in PIIX4 */ -static void __init quirk_dlcsetup(struct pci_dev *dev) +static void __devinit quirk_dlcsetup(struct pci_dev *dev)  {  	u8 odlc, ndlc;  	(void) pci_read_config_byte(dev, 0x82, &odlc); diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index b7f37d4982f..2e28f653f66 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -111,7 +111,7 @@ static void __init pci_clock_check(void)  	unsigned int __iomem *jmpr_p =  		(unsigned int *) ioremap(MALTA_JMPRS_REG, sizeof(unsigned int));  	int jmpr = (__raw_readl(jmpr_p) >> 2) & 0x07; -	static const int pciclocks[] __initdata = { +	static const int pciclocks[] __initconst = {  		33, 20, 25, 30, 12, 16, 37, 10  	};  	int pciclock = pciclocks[jmpr]; diff --git a/arch/mips/netlogic/xlp/setup.c b/arch/mips/netlogic/xlp/setup.c index acb677a1227..b3df7c2aad1 100644 --- a/arch/mips/netlogic/xlp/setup.c +++ b/arch/mips/netlogic/xlp/setup.c @@ -82,8 +82,10 @@ void __init prom_free_prom_memory(void)  void xlp_mmu_init(void)  { +	/* enable extended TLB and Large Fixed TLB */  	write_c0_config6(read_c0_config6() | 0x24); -	current_cpu_data.tlbsize = ((read_c0_config6() >> 16) & 0xffff) + 1; + +	/* set page mask of Fixed TLB in config7 */  	write_c0_config7(PM_DEFAULT_MASK >>  		(13 + (ffz(PM_DEFAULT_MASK >> 13) / 2)));  } @@ -100,6 +102,10 @@ void __init prom_init(void)  	nlm_common_ebase = read_c0_ebase() & (~((1 << 12) - 1));  #ifdef CONFIG_SMP  	nlm_wakeup_secondary_cpus(0xffffffff); + +	/* update TLB size after waking up threads */ +	current_cpu_data.tlbsize = ((read_c0_config6() >> 16) & 0xffff) + 1; +  	register_smp_ops(&nlm_smp_ops);  #endif  } diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c index d1f2d4c52d4..b6e378211a2 100644 --- a/arch/mips/oprofile/common.c +++ b/arch/mips/oprofile/common.c @@ -78,6 +78,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)  	switch (current_cpu_type()) {  	case CPU_5KC: +	case CPU_M14KC:  	case CPU_20KC:  	case CPU_24K:  	case CPU_25KF: diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c index baba3bcaa3c..4d80a856048 100644 --- a/arch/mips/oprofile/op_model_mipsxx.c +++ b/arch/mips/oprofile/op_model_mipsxx.c @@ -322,6 +322,10 @@ static int __init mipsxx_init(void)  	op_model_mipsxx_ops.num_counters = counters;  	switch (current_cpu_type()) { +	case CPU_M14KC: +		op_model_mipsxx_ops.cpu_type = "mips/M14Kc"; +		break; +  	case CPU_20KC:  		op_model_mipsxx_ops.cpu_type = "mips/20K";  		break; diff --git a/arch/mips/pci/fixup-fuloong2e.c b/arch/mips/pci/fixup-fuloong2e.c index d5d4c018fb0..0857ab8c391 100644 --- a/arch/mips/pci/fixup-fuloong2e.c +++ b/arch/mips/pci/fixup-fuloong2e.c @@ -48,7 +48,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev)  	return 0;  } -static void __init loongson2e_nec_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_nec_fixup(struct pci_dev *pdev)  {  	unsigned int val; @@ -60,7 +60,7 @@ static void __init loongson2e_nec_fixup(struct pci_dev *pdev)  	pci_write_config_dword(pdev, 0xe4, 1 << 5);  } -static void __init loongson2e_686b_func0_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_686b_func0_fixup(struct pci_dev *pdev)  {  	unsigned char c; @@ -135,7 +135,7 @@ static void __init loongson2e_686b_func0_fixup(struct pci_dev *pdev)  	printk(KERN_INFO"via686b fix: ISA bridge done\n");  } -static void __init loongson2e_686b_func1_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_686b_func1_fixup(struct pci_dev *pdev)  {  	printk(KERN_INFO"via686b fix: IDE\n"); @@ -168,19 +168,19 @@ static void __init loongson2e_686b_func1_fixup(struct pci_dev *pdev)  	printk(KERN_INFO"via686b fix: IDE done\n");  } -static void __init loongson2e_686b_func2_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_686b_func2_fixup(struct pci_dev *pdev)  {  	/* irq routing */  	pci_write_config_byte(pdev, PCI_INTERRUPT_LINE, 10);  } -static void __init loongson2e_686b_func3_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_686b_func3_fixup(struct pci_dev *pdev)  {  	/* irq routing */  	pci_write_config_byte(pdev, PCI_INTERRUPT_LINE, 11);  } -static void __init loongson2e_686b_func5_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_686b_func5_fixup(struct pci_dev *pdev)  {  	unsigned int val;  	unsigned char c; diff --git a/arch/mips/pci/fixup-lemote2f.c b/arch/mips/pci/fixup-lemote2f.c index 4b9768d5d72..a7b917dcf60 100644 --- a/arch/mips/pci/fixup-lemote2f.c +++ b/arch/mips/pci/fixup-lemote2f.c @@ -96,21 +96,21 @@ int pcibios_plat_dev_init(struct pci_dev *dev)  }  /* CS5536 SPEC. fixup */ -static void __init loongson_cs5536_isa_fixup(struct pci_dev *pdev) +static void __devinit loongson_cs5536_isa_fixup(struct pci_dev *pdev)  {  	/* the uart1 and uart2 interrupt in PIC is enabled as default */  	pci_write_config_dword(pdev, PCI_UART1_INT_REG, 1);  	pci_write_config_dword(pdev, PCI_UART2_INT_REG, 1);  } -static void __init loongson_cs5536_ide_fixup(struct pci_dev *pdev) +static void __devinit loongson_cs5536_ide_fixup(struct pci_dev *pdev)  {  	/* setting the mutex pin as IDE function */  	pci_write_config_dword(pdev, PCI_IDE_CFG_REG,  			       CS5536_IDE_FLASH_SIGNATURE);  } -static void __init loongson_cs5536_acc_fixup(struct pci_dev *pdev) +static void __devinit loongson_cs5536_acc_fixup(struct pci_dev *pdev)  {  	/* enable the AUDIO interrupt in PIC  */  	pci_write_config_dword(pdev, PCI_ACC_INT_REG, 1); @@ -118,14 +118,14 @@ static void __init loongson_cs5536_acc_fixup(struct pci_dev *pdev)  	pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0xc0);  } -static void __init loongson_cs5536_ohci_fixup(struct pci_dev *pdev) +static void __devinit loongson_cs5536_ohci_fixup(struct pci_dev *pdev)  {  	/* enable the OHCI interrupt in PIC */  	/* THE OHCI, EHCI, UDC, OTG are shared with interrupt in PIC */  	pci_write_config_dword(pdev, PCI_OHCI_INT_REG, 1);  } -static void __init loongson_cs5536_ehci_fixup(struct pci_dev *pdev) +static void __devinit loongson_cs5536_ehci_fixup(struct pci_dev *pdev)  {  	u32 hi, lo; @@ -137,7 +137,7 @@ static void __init loongson_cs5536_ehci_fixup(struct pci_dev *pdev)  	pci_write_config_dword(pdev, PCI_EHCI_FLADJ_REG, 0x2000);  } -static void __init loongson_nec_fixup(struct pci_dev *pdev) +static void __devinit loongson_nec_fixup(struct pci_dev *pdev)  {  	unsigned int val; diff --git a/arch/mips/pci/fixup-malta.c b/arch/mips/pci/fixup-malta.c index 0f48498bc23..70073c98ed3 100644 --- a/arch/mips/pci/fixup-malta.c +++ b/arch/mips/pci/fixup-malta.c @@ -49,10 +49,10 @@ int pcibios_plat_dev_init(struct pci_dev *dev)  	return 0;  } -static void __init malta_piix_func0_fixup(struct pci_dev *pdev) +static void __devinit malta_piix_func0_fixup(struct pci_dev *pdev)  {  	unsigned char reg_val; -	static int piixirqmap[16] __initdata = {  /* PIIX PIRQC[A:D] irq mappings */ +	static int piixirqmap[16] __devinitdata = {  /* PIIX PIRQC[A:D] irq mappings */  		0,  0, 	0,  3,  		4,  5,  6,  7,  		0,  9, 10, 11, @@ -83,7 +83,7 @@ static void __init malta_piix_func0_fixup(struct pci_dev *pdev)  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_0,  	 malta_piix_func0_fixup); -static void __init malta_piix_func1_fixup(struct pci_dev *pdev) +static void __devinit malta_piix_func1_fixup(struct pci_dev *pdev)  {  	unsigned char reg_val; diff --git a/arch/mips/pci/fixup-mpc30x.c b/arch/mips/pci/fixup-mpc30x.c index e08f49cb687..8e4f8288eca 100644 --- a/arch/mips/pci/fixup-mpc30x.c +++ b/arch/mips/pci/fixup-mpc30x.c @@ -22,13 +22,13 @@  #include <asm/vr41xx/mpc30x.h> -static const int internal_func_irqs[] __initdata = { +static const int internal_func_irqs[] __initconst = {  	VRC4173_CASCADE_IRQ,  	VRC4173_AC97_IRQ,  	VRC4173_USB_IRQ,  }; -static const int irq_tab_mpc30x[] __initdata = { +static const int irq_tab_mpc30x[] __initconst = {   [12] = VRC4173_PCMCIA1_IRQ,   [13] = VRC4173_PCMCIA2_IRQ,   [29] = MQ200_IRQ, diff --git a/arch/mips/pci/fixup-sb1250.c b/arch/mips/pci/fixup-sb1250.c index f0bb9146e6c..d02900a7291 100644 --- a/arch/mips/pci/fixup-sb1250.c +++ b/arch/mips/pci/fixup-sb1250.c @@ -15,7 +15,7 @@   * Set the BCM1250, etc. PCI host bridge's TRDY timeout   * to the finite max.   */ -static void __init quirk_sb1250_pci(struct pci_dev *dev) +static void __devinit quirk_sb1250_pci(struct pci_dev *dev)  {  	pci_write_config_byte(dev, 0x40, 0xff);  } @@ -25,7 +25,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_PCI,  /*   * The BCM1250, etc. PCI/HT bridge reports as a host bridge.   */ -static void __init quirk_sb1250_ht(struct pci_dev *dev) +static void __devinit quirk_sb1250_ht(struct pci_dev *dev)  {  	dev->class = PCI_CLASS_BRIDGE_PCI << 8;  } @@ -35,7 +35,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_HT,  /*   * Set the SP1011 HT/PCI bridge's TRDY timeout to the finite max.   */ -static void __init quirk_sp1011(struct pci_dev *dev) +static void __devinit quirk_sp1011(struct pci_dev *dev)  {  	pci_write_config_byte(dev, 0x64, 0xff);  } diff --git a/arch/mips/pci/ops-tx4927.c b/arch/mips/pci/ops-tx4927.c index a1e7e6d80c8..bc13e29d2bb 100644 --- a/arch/mips/pci/ops-tx4927.c +++ b/arch/mips/pci/ops-tx4927.c @@ -495,7 +495,7 @@ irqreturn_t tx4927_pcierr_interrupt(int irq, void *dev_id)  }  #ifdef CONFIG_TOSHIBA_FPCIB0 -static void __init tx4927_quirk_slc90e66_bridge(struct pci_dev *dev) +static void __devinit tx4927_quirk_slc90e66_bridge(struct pci_dev *dev)  {  	struct tx4927_pcic_reg __iomem *pcicptr = pci_bus_to_pcicptr(dev->bus); diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c index 0fbe4c0c170..fdc24440294 100644 --- a/arch/mips/pci/pci-ip27.c +++ b/arch/mips/pci/pci-ip27.c @@ -212,7 +212,7 @@ static inline void pci_enable_swapping(struct pci_dev *dev)  	bridge->b_widget.w_tflush;	/* Flush */  } -static void __init pci_fixup_ioc3(struct pci_dev *d) +static void __devinit pci_fixup_ioc3(struct pci_dev *d)  {  	pci_disable_swapping(d);  } diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c index 1644805a673..172af1cd586 100644 --- a/arch/mips/pci/pci-xlr.c +++ b/arch/mips/pci/pci-xlr.c @@ -41,6 +41,7 @@  #include <linux/irq.h>  #include <linux/irqdesc.h>  #include <linux/console.h> +#include <linux/pci_regs.h>  #include <asm/io.h> @@ -156,35 +157,55 @@ struct pci_controller nlm_pci_controller = {  	.io_offset      = 0x00000000UL,  }; +/* + * The top level PCIe links on the XLS PCIe controller appear as + * bridges. Given a device, this function finds which link it is + * on. + */ +static struct pci_dev *xls_get_pcie_link(const struct pci_dev *dev) +{ +	struct pci_bus *bus, *p; + +	/* Find the bridge on bus 0 */ +	bus = dev->bus; +	for (p = bus->parent; p && p->number != 0; p = p->parent) +		bus = p; + +	return p ? bus->self : NULL; +} +  static int get_irq_vector(const struct pci_dev *dev)  { +	struct pci_dev *lnk; +  	if (!nlm_chip_is_xls()) -		return	PIC_PCIX_IRQ;	/* for XLR just one IRQ*/ +		return	PIC_PCIX_IRQ;	/* for XLR just one IRQ */  	/*  	 * For XLS PCIe, there is an IRQ per Link, find out which  	 * link the device is on to assign interrupts -	*/ -	if (dev->bus->self == NULL) +	 */ +	lnk = xls_get_pcie_link(dev); +	if (lnk == NULL)  		return 0; -	switch	(dev->bus->self->devfn) { -	case 0x0: +	switch	(PCI_SLOT(lnk->devfn)) { +	case 0:  		return PIC_PCIE_LINK0_IRQ; -	case 0x8: +	case 1:  		return PIC_PCIE_LINK1_IRQ; -	case 0x10: +	case 2:  		if (nlm_chip_is_xls_b())  			return PIC_PCIE_XLSB0_LINK2_IRQ;  		else  			return PIC_PCIE_LINK2_IRQ; -	case 0x18: +	case 3:  		if (nlm_chip_is_xls_b())  			return PIC_PCIE_XLSB0_LINK3_IRQ;  		else  			return PIC_PCIE_LINK3_IRQ;  	} -	WARN(1, "Unexpected devfn %d\n", dev->bus->self->devfn); +	WARN(1, "Unexpected devfn %d\n", lnk->devfn);  	return 0;  } @@ -202,7 +223,27 @@ void arch_teardown_msi_irq(unsigned int irq)  int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)  {  	struct msi_msg msg; +	struct pci_dev *lnk;  	int irq, ret; +	u16 val; + +	/* MSI not supported on XLR */ +	if (!nlm_chip_is_xls()) +		return 1; + +	/* +	 * Enable MSI on the XLS PCIe controller bridge which was disabled +	 * at enumeration, the bridge MSI capability is at 0x50 +	 */ +	lnk = xls_get_pcie_link(dev); +	if (lnk == NULL) +		return 1; + +	pci_read_config_word(lnk, 0x50 + PCI_MSI_FLAGS, &val); +	if ((val & PCI_MSI_FLAGS_ENABLE) == 0) { +		val |= PCI_MSI_FLAGS_ENABLE; +		pci_write_config_word(lnk, 0x50 + PCI_MSI_FLAGS, val); +	}  	irq = get_irq_vector(dev);  	if (irq <= 0) @@ -327,7 +368,7 @@ static int __init pcibios_init(void)  		}  	} else {  		/* XLR PCI controller ACK */ -		irq_set_handler_data(PIC_PCIE_XLSB0_LINK3_IRQ, xlr_pci_ack); +		irq_set_handler_data(PIC_PCIX_IRQ, xlr_pci_ack);  	}  	return 0; diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c index b71fae23104..5edab2bc6fc 100644 --- a/arch/mips/pmc-sierra/yosemite/smp.c +++ b/arch/mips/pmc-sierra/yosemite/smp.c @@ -115,11 +115,11 @@ static void yos_send_ipi_mask(const struct cpumask *mask, unsigned int action)   */  static void __cpuinit yos_init_secondary(void)  { -	set_c0_status(ST0_CO | ST0_IE | ST0_IM);  }  static void __cpuinit yos_smp_finish(void)  { +	set_c0_status(ST0_CO | ST0_IM | ST0_IE);  }  /* Hook for after all CPUs are online */ diff --git a/arch/mips/powertv/asic/asic-calliope.c b/arch/mips/powertv/asic/asic-calliope.c index 0a170e0ffea..7773f3d956b 100644 --- a/arch/mips/powertv/asic/asic-calliope.c +++ b/arch/mips/powertv/asic/asic-calliope.c @@ -28,7 +28,7 @@  #define CALLIOPE_ADDR(x)	(CALLIOPE_IO_BASE + (x)) -const struct register_map calliope_register_map __initdata = { +const struct register_map calliope_register_map __initconst = {  	.eic_slow0_strt_add = {.phys = CALLIOPE_ADDR(0x800000)},  	.eic_cfg_bits = {.phys = CALLIOPE_ADDR(0x800038)},  	.eic_ready_status = {.phys = CALLIOPE_ADDR(0x80004c)}, diff --git a/arch/mips/powertv/asic/asic-cronus.c b/arch/mips/powertv/asic/asic-cronus.c index bbc0c122be5..da076db7b7e 100644 --- a/arch/mips/powertv/asic/asic-cronus.c +++ b/arch/mips/powertv/asic/asic-cronus.c @@ -28,7 +28,7 @@  #define CRONUS_ADDR(x)	(CRONUS_IO_BASE + (x)) -const struct register_map cronus_register_map __initdata = { +const struct register_map cronus_register_map __initconst = {  	.eic_slow0_strt_add = {.phys = CRONUS_ADDR(0x000000)},  	.eic_cfg_bits = {.phys = CRONUS_ADDR(0x000038)},  	.eic_ready_status = {.phys = CRONUS_ADDR(0x00004C)}, diff --git a/arch/mips/powertv/asic/asic-gaia.c b/arch/mips/powertv/asic/asic-gaia.c index 91dda682752..47683b370e7 100644 --- a/arch/mips/powertv/asic/asic-gaia.c +++ b/arch/mips/powertv/asic/asic-gaia.c @@ -23,7 +23,7 @@  #include <linux/init.h>  #include <asm/mach-powertv/asic.h> -const struct register_map gaia_register_map __initdata = { +const struct register_map gaia_register_map __initconst = {  	.eic_slow0_strt_add = {.phys = GAIA_IO_BASE + 0x000000},  	.eic_cfg_bits = {.phys = GAIA_IO_BASE + 0x000038},  	.eic_ready_status = {.phys = GAIA_IO_BASE + 0x00004C}, diff --git a/arch/mips/powertv/asic/asic-zeus.c b/arch/mips/powertv/asic/asic-zeus.c index 4a05bb09647..6ff4b10f09d 100644 --- a/arch/mips/powertv/asic/asic-zeus.c +++ b/arch/mips/powertv/asic/asic-zeus.c @@ -28,7 +28,7 @@  #define ZEUS_ADDR(x)	(ZEUS_IO_BASE + (x)) -const struct register_map zeus_register_map __initdata = { +const struct register_map zeus_register_map __initconst = {  	.eic_slow0_strt_add = {.phys = ZEUS_ADDR(0x000000)},  	.eic_cfg_bits = {.phys = ZEUS_ADDR(0x000038)},  	.eic_ready_status = {.phys = ZEUS_ADDR(0x00004c)}, diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c index 682efb0c108..64eb71b1528 100644 --- a/arch/mips/txx9/generic/pci.c +++ b/arch/mips/txx9/generic/pci.c @@ -269,7 +269,7 @@ txx9_i8259_irq_setup(int irq)  	return err;  } -static void __init quirk_slc90e66_bridge(struct pci_dev *dev) +static void __devinit quirk_slc90e66_bridge(struct pci_dev *dev)  {  	int irq;	/* PCI/ISA Bridge interrupt */  	u8 reg_64; diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c index 6ab0bee2a54..4d584ae29ae 100644 --- a/arch/mn10300/kernel/signal.c +++ b/arch/mn10300/kernel/signal.c @@ -459,10 +459,11 @@ static int handle_signal(int sig,  	else  		ret = setup_frame(sig, ka, oldset, regs);  	if (ret) -		return; +		return ret;  	signal_delivered(sig, info, ka, regs, -				 test_thread_flag(TIF_SINGLESTEP)); +			 test_thread_flag(TIF_SINGLESTEP)); +	return 0;  }  /* diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c index 090d35d3697..e62c223e4c4 100644 --- a/arch/mn10300/kernel/smp.c +++ b/arch/mn10300/kernel/smp.c @@ -876,9 +876,7 @@ static void __init smp_online(void)  	notify_cpu_starting(cpu); -	ipi_call_lock();  	set_cpu_online(cpu, true); -	ipi_call_unlock();  	local_irq_enable();  } diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index a47828d31fe..6266730efd6 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -300,9 +300,7 @@ smp_cpu_init(int cpunum)  	notify_cpu_starting(cpunum); -	ipi_call_lock();  	set_cpu_online(cpunum, true); -	ipi_call_unlock();  	/* Initialise the idle task for this CPU */  	atomic_inc(&init_mm.mm_count); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index e4cb34322de..e1417c42155 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -571,7 +571,6 @@ void __devinit start_secondary(void *unused)  	if (system_state == SYSTEM_RUNNING)  		vdso_data->processorCount++;  #endif -	ipi_call_lock();  	notify_cpu_starting(cpu);  	set_cpu_online(cpu, true);  	/* Update sibling maps */ @@ -601,7 +600,6 @@ void __devinit start_secondary(void *unused)  		of_node_put(np);  	}  	of_node_put(l2_cache); -	ipi_call_unlock();  	local_irq_enable(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 15cca26ccb6..8dca9c248ac 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -717,9 +717,7 @@ static void __cpuinit smp_start_secondary(void *cpuvoid)  	init_cpu_vtimer();  	pfault_init();  	notify_cpu_starting(smp_processor_id()); -	ipi_call_lock();  	set_cpu_online(smp_processor_id(), true); -	ipi_call_unlock();  	local_irq_enable();  	/* cpu_idle will call schedule for us */  	cpu_idle(); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index f591598d92f..781bcb10b8b 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -103,8 +103,6 @@ void __cpuinit smp_callin(void)  	if (cheetah_pcache_forced_on)  		cheetah_enable_pcache(); -	local_irq_enable(); -  	callin_flag = 1;  	__asm__ __volatile__("membar #Sync\n\t"  			     "flush  %%g6" : : : "memory"); @@ -124,9 +122,8 @@ void __cpuinit smp_callin(void)  	while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))  		rmb(); -	ipi_call_lock_irq();  	set_cpu_online(cpuid, true); -	ipi_call_unlock_irq(); +	local_irq_enable();  	/* idle thread is expected to have preempt disabled */  	preempt_disable(); @@ -1308,9 +1305,7 @@ int __cpu_disable(void)  	mdelay(1);  	local_irq_disable(); -	ipi_call_lock();  	set_cpu_online(cpu, false); -	ipi_call_unlock();  	cpu_map_rebuild(); diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index 84873fbe8f2..e686c5ac90b 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -198,17 +198,7 @@ void __cpuinit online_secondary(void)  	notify_cpu_starting(smp_processor_id()); -	/* -	 * We need to hold call_lock, so there is no inconsistency -	 * between the time smp_call_function() determines number of -	 * IPI recipients, and the time when the determination is made -	 * for which cpus receive the IPI. Holding this -	 * lock helps us to not include this cpu in a currently in progress -	 * smp_call_function(). -	 */ -	ipi_call_lock();  	set_cpu_online(smp_processor_id(), 1); -	ipi_call_unlock();  	__get_cpu_var(cpu_state) = CPU_ONLINE;  	/* Set up tile-specific state for this cpu. */ diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1f252143455..b0c5276861e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -49,6 +49,9 @@ else          KBUILD_AFLAGS += -m64          KBUILD_CFLAGS += -m64 +	# Use -mpreferred-stack-boundary=3 if supported. +	KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3) +          # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)          cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)          cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 49331bedc15..70780689599 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -75,23 +75,54 @@ static inline int alternatives_text_reserved(void *start, void *end)  }  #endif	/* CONFIG_SMP */ +#define OLDINSTR(oldinstr)	"661:\n\t" oldinstr "\n662:\n" + +#define b_replacement(number)	"663"#number +#define e_replacement(number)	"664"#number + +#define alt_slen "662b-661b" +#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f" + +#define ALTINSTR_ENTRY(feature, number)					      \ +	" .long 661b - .\n"				/* label           */ \ +	" .long " b_replacement(number)"f - .\n"	/* new instruction */ \ +	" .word " __stringify(feature) "\n"		/* feature bit     */ \ +	" .byte " alt_slen "\n"				/* source len      */ \ +	" .byte " alt_rlen(number) "\n"			/* replacement len */ + +#define DISCARD_ENTRY(number)				/* rlen <= slen */    \ +	" .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n" + +#define ALTINSTR_REPLACEMENT(newinstr, feature, number)	/* replacement */     \ +	b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t" +  /* alternative assembly primitive: */  #define ALTERNATIVE(oldinstr, newinstr, feature)			\ -									\ -      "661:\n\t" oldinstr "\n662:\n"					\ -      ".section .altinstructions,\"a\"\n"				\ -      "	 .long 661b - .\n"			/* label           */	\ -      "	 .long 663f - .\n"			/* new instruction */	\ -      "	 .word " __stringify(feature) "\n"	/* feature bit     */	\ -      "	 .byte 662b-661b\n"			/* sourcelen       */	\ -      "	 .byte 664f-663f\n"			/* replacementlen  */	\ -      ".previous\n"							\ -      ".section .discard,\"aw\",@progbits\n"				\ -      "	 .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */	\ -      ".previous\n"							\ -      ".section .altinstr_replacement, \"ax\"\n"			\ -      "663:\n\t" newinstr "\n664:\n"		/* replacement     */	\ -      ".previous" +	OLDINSTR(oldinstr)						\ +	".section .altinstructions,\"a\"\n"				\ +	ALTINSTR_ENTRY(feature, 1)					\ +	".previous\n"							\ +	".section .discard,\"aw\",@progbits\n"				\ +	DISCARD_ENTRY(1)						\ +	".previous\n"							\ +	".section .altinstr_replacement, \"ax\"\n"			\ +	ALTINSTR_REPLACEMENT(newinstr, feature, 1)			\ +	".previous" + +#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ +	OLDINSTR(oldinstr)						\ +	".section .altinstructions,\"a\"\n"				\ +	ALTINSTR_ENTRY(feature1, 1)					\ +	ALTINSTR_ENTRY(feature2, 2)					\ +	".previous\n"							\ +	".section .discard,\"aw\",@progbits\n"				\ +	DISCARD_ENTRY(1)						\ +	DISCARD_ENTRY(2)						\ +	".previous\n"							\ +	".section .altinstr_replacement, \"ax\"\n"			\ +	ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)			\ +	ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)			\ +	".previous"  /*   * This must be included *after* the definition of ALTERNATIVE due to @@ -140,6 +171,19 @@ static inline int alternatives_text_reserved(void *start, void *end)  		: output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)  /* + * Like alternative_call, but there are two features and respective functions. + * If CPU has feature2, function2 is used. + * Otherwise, if CPU has feature1, function1 is used. + * Otherwise, old function is used. + */ +#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2,   \ +			   output, input...)				      \ +	asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ +		"call %P[new2]", feature2)				      \ +		: output : [old] "i" (oldfunc), [new1] "i" (newfunc1),	      \ +		[new2] "i" (newfunc2), ## input) + +/*   * use this macro(s) if you need more than one output parameter   * in alternative_io   */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index eaff4790ed9..88093c1d44f 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -306,7 +306,8 @@ struct apic {  	unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid);  	unsigned long (*check_apicid_present)(int apicid); -	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); +	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask, +					 const struct cpumask *mask);  	void (*init_apic_ldr)(void);  	void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); @@ -331,9 +332,9 @@ struct apic {  	unsigned long (*set_apic_id)(unsigned int id);  	unsigned long apic_id_mask; -	unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); -	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, -					       const struct cpumask *andmask); +	int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, +				      const struct cpumask *andmask, +				      unsigned int *apicid);  	/* ipi */  	void (*send_IPI_mask)(const struct cpumask *mask, int vector); @@ -537,6 +538,11 @@ static inline const struct cpumask *default_target_cpus(void)  #endif  } +static inline const struct cpumask *online_target_cpus(void) +{ +	return cpu_online_mask; +} +  DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); @@ -586,21 +592,50 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)  #endif -static inline unsigned int -default_cpu_mask_to_apicid(const struct cpumask *cpumask) +static inline int +flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, +			    const struct cpumask *andmask, +			    unsigned int *apicid)  { -	return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; +	unsigned long cpu_mask = cpumask_bits(cpumask)[0] & +				 cpumask_bits(andmask)[0] & +				 cpumask_bits(cpu_online_mask)[0] & +				 APIC_ALL_CPUS; + +	if (likely(cpu_mask)) { +		*apicid = (unsigned int)cpu_mask; +		return 0; +	} else { +		return -EINVAL; +	}  } -static inline unsigned int +extern int  default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, -			       const struct cpumask *andmask) +			       const struct cpumask *andmask, +			       unsigned int *apicid); + +static inline void +flat_vector_allocation_domain(int cpu, struct cpumask *retmask, +			      const struct cpumask *mask)  { -	unsigned long mask1 = cpumask_bits(cpumask)[0]; -	unsigned long mask2 = cpumask_bits(andmask)[0]; -	unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; +	/* Careful. Some cpus do not strictly honor the set of cpus +	 * specified in the interrupt destination when using lowest +	 * priority interrupt delivery mode. +	 * +	 * In particular there was a hyperthreading cpu observed to +	 * deliver interrupts to the wrong hyperthread when only one +	 * hyperthread was specified in the interrupt desitination. +	 */ +	cpumask_clear(retmask); +	cpumask_bits(retmask)[0] = APIC_ALL_CPUS; +} -	return (unsigned int)(mask1 & mask2 & mask3); +static inline void +default_vector_allocation_domain(int cpu, struct cpumask *retmask, +				 const struct cpumask *mask) +{ +	cpumask_copy(retmask, cpumask_of(cpu));  }  static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h index cc70c1c78ca..75ce3f47d20 100644 --- a/arch/x86/include/asm/emergency-restart.h +++ b/arch/x86/include/asm/emergency-restart.h @@ -4,9 +4,7 @@  enum reboot_type {  	BOOT_TRIPLE = 't',  	BOOT_KBD = 'k', -#ifdef CONFIG_X86_32  	BOOT_BIOS = 'b', -#endif  	BOOT_ACPI = 'a',  	BOOT_EFI = 'e',  	BOOT_CF9 = 'p', diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h index dbe82a5c5ea..d3d74698dce 100644 --- a/arch/x86/include/asm/floppy.h +++ b/arch/x86/include/asm/floppy.h @@ -99,7 +99,7 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id)  		virtual_dma_residue += virtual_dma_count;  		virtual_dma_count = 0;  #ifdef TRACE_FLPY_INT -		printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", +		printk(KERN_DEBUG "count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n",  		       virtual_dma_count, virtual_dma_residue, calls, bytes,  		       dma_wait);  		calls = 0; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index db7c1f2709a..2da88c0cda1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -313,8 +313,8 @@ struct kvm_pmu {  	u64 counter_bitmask[2];  	u64 global_ctrl_mask;  	u8 version; -	struct kvm_pmc gp_counters[X86_PMC_MAX_GENERIC]; -	struct kvm_pmc fixed_counters[X86_PMC_MAX_FIXED]; +	struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; +	struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];  	struct irq_work irq_work;  	u64 reprogram_pmi;  }; diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 084ef95274c..813ed103f45 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -115,8 +115,8 @@ notrace static inline int native_write_msr_safe(unsigned int msr,  extern unsigned long long native_read_tsc(void); -extern int native_rdmsr_safe_regs(u32 regs[8]); -extern int native_wrmsr_safe_regs(u32 regs[8]); +extern int rdmsr_safe_regs(u32 regs[8]); +extern int wrmsr_safe_regs(u32 regs[8]);  static __always_inline unsigned long long __native_read_tsc(void)  { @@ -187,43 +187,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)  	return err;  } -static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) -{ -	u32 gprs[8] = { 0 }; -	int err; - -	gprs[1] = msr; -	gprs[7] = 0x9c5a203a; - -	err = native_rdmsr_safe_regs(gprs); - -	*p = gprs[0] | ((u64)gprs[2] << 32); - -	return err; -} - -static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) -{ -	u32 gprs[8] = { 0 }; - -	gprs[0] = (u32)val; -	gprs[1] = msr; -	gprs[2] = val >> 32; -	gprs[7] = 0x9c5a203a; - -	return native_wrmsr_safe_regs(gprs); -} - -static inline int rdmsr_safe_regs(u32 regs[8]) -{ -	return native_rdmsr_safe_regs(regs); -} - -static inline int wrmsr_safe_regs(u32 regs[8]) -{ -	return native_wrmsr_safe_regs(regs); -} -  #define rdtscl(low)						\  	((low) = (u32)__native_read_tsc()) @@ -237,6 +200,8 @@ do {							\  	(high) = (u32)(_l >> 32);			\  } while (0) +#define rdpmcl(counter, val) ((val) = native_read_pmc(counter)) +  #define rdtscp(low, high, aux)					\  do {                                                            \  	unsigned long long _val = native_read_tscp(&(aux));     \ @@ -248,8 +213,7 @@ do {                                                            \  #endif	/* !CONFIG_PARAVIRT */ - -#define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val),		\ +#define wrmsrl_safe(msr, val) wrmsr_safe((msr), (u32)(val),		\  					     (u32)((val) >> 32))  #define write_tsc(val1, val2) wrmsr(MSR_IA32_TSC, (val1), (val2)) diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index dc580c42851..c0fa356e90d 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -44,28 +44,14 @@ struct nmiaction {  	const char		*name;  }; -#define register_nmi_handler(t, fn, fg, n)		\ +#define register_nmi_handler(t, fn, fg, n, init...)	\  ({							\ -	static struct nmiaction fn##_na = {		\ +	static struct nmiaction init fn##_na = {	\  		.handler = (fn),			\  		.name = (n),				\  		.flags = (fg),				\  	};						\ -	__register_nmi_handler((t), &fn##_na);	\ -}) - -/* - * For special handlers that register/unregister in the - * init section only.  This should be considered rare. - */ -#define register_nmi_handler_initonly(t, fn, fg, n)		\ -({							\ -	static struct nmiaction fn##_na __initdata = {		\ -		.handler = (fn),			\ -		.name = (n),				\ -		.flags = (fg),				\ -	};						\ -	__register_nmi_handler((t), &fn##_na);	\ +	__register_nmi_handler((t), &fn##_na);		\  })  int __register_nmi_handler(unsigned int, struct nmiaction *); diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 6cbbabf5270..0b47ddb6f00 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -128,21 +128,11 @@ static inline u64 paravirt_read_msr(unsigned msr, int *err)  	return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);  } -static inline int paravirt_rdmsr_regs(u32 *regs) -{ -	return PVOP_CALL1(int, pv_cpu_ops.rdmsr_regs, regs); -} -  static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)  {  	return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);  } -static inline int paravirt_wrmsr_regs(u32 *regs) -{ -	return PVOP_CALL1(int, pv_cpu_ops.wrmsr_regs, regs); -} -  /* These should all do BUG_ON(_err), but our headers are too tangled. */  #define rdmsr(msr, val1, val2)			\  do {						\ @@ -176,9 +166,6 @@ do {						\  	_err;					\  }) -#define rdmsr_safe_regs(regs)	paravirt_rdmsr_regs(regs) -#define wrmsr_safe_regs(regs)	paravirt_wrmsr_regs(regs) -  static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)  {  	int err; @@ -186,32 +173,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)  	*p = paravirt_read_msr(msr, &err);  	return err;  } -static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) -{ -	u32 gprs[8] = { 0 }; -	int err; - -	gprs[1] = msr; -	gprs[7] = 0x9c5a203a; - -	err = paravirt_rdmsr_regs(gprs); - -	*p = gprs[0] | ((u64)gprs[2] << 32); - -	return err; -} - -static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) -{ -	u32 gprs[8] = { 0 }; - -	gprs[0] = (u32)val; -	gprs[1] = msr; -	gprs[2] = val >> 32; -	gprs[7] = 0x9c5a203a; - -	return paravirt_wrmsr_regs(gprs); -}  static inline u64 paravirt_read_tsc(void)  { @@ -252,6 +213,8 @@ do {						\  	high = _l >> 32;			\  } while (0) +#define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter)) +  static inline unsigned long long paravirt_rdtscp(unsigned int *aux)  {  	return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux); diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 8e8b9a4987e..8613cbb7ba4 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -153,9 +153,7 @@ struct pv_cpu_ops {  	/* MSR, PMC and TSR operations.  	   err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */  	u64 (*read_msr)(unsigned int msr, int *err); -	int (*rdmsr_regs)(u32 *regs);  	int (*write_msr)(unsigned int msr, unsigned low, unsigned high); -	int (*wrmsr_regs)(u32 *regs);  	u64 (*read_tsc)(void);  	u64 (*read_pmc)(int counter); diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index b3a53174602..5ad24a89b19 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -7,9 +7,13 @@  #undef DEBUG  #ifdef DEBUG -#define DBG(x...) printk(x) +#define DBG(fmt, ...) printk(fmt, ##__VA_ARGS__)  #else -#define DBG(x...) +#define DBG(fmt, ...)				\ +do {						\ +	if (0)					\ +		printk(fmt, ##__VA_ARGS__);	\ +} while (0)  #endif  #define PCI_PROBE_BIOS		0x0001 diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 588f52ea810..c78f14a0df0 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -5,11 +5,10 @@   * Performance event hw details:   */ -#define X86_PMC_MAX_GENERIC				       32 -#define X86_PMC_MAX_FIXED					3 +#define INTEL_PMC_MAX_GENERIC				       32 +#define INTEL_PMC_MAX_FIXED					3 +#define INTEL_PMC_IDX_FIXED				       32 -#define X86_PMC_IDX_GENERIC				        0 -#define X86_PMC_IDX_FIXED				       32  #define X86_PMC_IDX_MAX					       64  #define MSR_ARCH_PERFMON_PERFCTR0			      0xc1 @@ -48,8 +47,7 @@  	(X86_RAW_EVENT_MASK          |  \  	 AMD64_EVENTSEL_EVENT)  #define AMD64_NUM_COUNTERS				4 -#define AMD64_NUM_COUNTERS_F15H				6 -#define AMD64_NUM_COUNTERS_MAX				AMD64_NUM_COUNTERS_F15H +#define AMD64_NUM_COUNTERS_CORE				6  #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		0x3c  #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8) @@ -121,16 +119,16 @@ struct x86_pmu_capability {  /* Instr_Retired.Any: */  #define MSR_ARCH_PERFMON_FIXED_CTR0	0x309 -#define X86_PMC_IDX_FIXED_INSTRUCTIONS	(X86_PMC_IDX_FIXED + 0) +#define INTEL_PMC_IDX_FIXED_INSTRUCTIONS	(INTEL_PMC_IDX_FIXED + 0)  /* CPU_CLK_Unhalted.Core: */  #define MSR_ARCH_PERFMON_FIXED_CTR1	0x30a -#define X86_PMC_IDX_FIXED_CPU_CYCLES	(X86_PMC_IDX_FIXED + 1) +#define INTEL_PMC_IDX_FIXED_CPU_CYCLES	(INTEL_PMC_IDX_FIXED + 1)  /* CPU_CLK_Unhalted.Ref: */  #define MSR_ARCH_PERFMON_FIXED_CTR2	0x30b -#define X86_PMC_IDX_FIXED_REF_CYCLES	(X86_PMC_IDX_FIXED + 2) -#define X86_PMC_MSK_FIXED_REF_CYCLES	(1ULL << X86_PMC_IDX_FIXED_REF_CYCLES) +#define INTEL_PMC_IDX_FIXED_REF_CYCLES	(INTEL_PMC_IDX_FIXED + 2) +#define INTEL_PMC_MSK_FIXED_REF_CYCLES	(1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES)  /*   * We model BTS tracing as another fixed-mode PMC. @@ -139,7 +137,7 @@ struct x86_pmu_capability {   * values are used by actual fixed events and higher values are used   * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.   */ -#define X86_PMC_IDX_FIXED_BTS				(X86_PMC_IDX_FIXED + 16) +#define INTEL_PMC_IDX_FIXED_BTS				(INTEL_PMC_IDX_FIXED + 16)  /*   * IBS cpuid feature detection @@ -234,6 +232,7 @@ struct perf_guest_switch_msr {  extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);  extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); +extern void perf_check_microcode(void);  #else  static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)  { @@ -247,6 +246,7 @@ static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)  }  static inline void perf_events_lapic_init(void)	{ } +static inline void perf_check_microcode(void) { }  #endif  #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 98391db840c..f2b489cf160 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -2,9 +2,9 @@  #define _ASM_X86_PGTABLE_2LEVEL_H  #define pte_ERROR(e) \ -	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) +	pr_err("%s:%d: bad pte %08lx\n", __FILE__, __LINE__, (e).pte_low)  #define pgd_ERROR(e) \ -	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) +	pr_err("%s:%d: bad pgd %08lx\n", __FILE__, __LINE__, pgd_val(e))  /*   * Certain architectures need to do special things when PTEs diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index cb00ccc7d57..4cc9f2b7cdc 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -9,13 +9,13 @@   */  #define pte_ERROR(e)							\ -	printk("%s:%d: bad pte %p(%08lx%08lx).\n",			\ +	pr_err("%s:%d: bad pte %p(%08lx%08lx)\n",			\  	       __FILE__, __LINE__, &(e), (e).pte_high, (e).pte_low)  #define pmd_ERROR(e)							\ -	printk("%s:%d: bad pmd %p(%016Lx).\n",				\ +	pr_err("%s:%d: bad pmd %p(%016Lx)\n",				\  	       __FILE__, __LINE__, &(e), pmd_val(e))  #define pgd_ERROR(e)							\ -	printk("%s:%d: bad pgd %p(%016Lx).\n",				\ +	pr_err("%s:%d: bad pgd %p(%016Lx)\n",				\  	       __FILE__, __LINE__, &(e), pgd_val(e))  /* Rules for using set_pte: the pte being assigned *must* be diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 975f709e09a..8251be02301 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -26,16 +26,16 @@ extern pgd_t init_level4_pgt[];  extern void paging_init(void);  #define pte_ERROR(e)					\ -	printk("%s:%d: bad pte %p(%016lx).\n",		\ +	pr_err("%s:%d: bad pte %p(%016lx)\n",		\  	       __FILE__, __LINE__, &(e), pte_val(e))  #define pmd_ERROR(e)					\ -	printk("%s:%d: bad pmd %p(%016lx).\n",		\ +	pr_err("%s:%d: bad pmd %p(%016lx)\n",		\  	       __FILE__, __LINE__, &(e), pmd_val(e))  #define pud_ERROR(e)					\ -	printk("%s:%d: bad pud %p(%016lx).\n",		\ +	pr_err("%s:%d: bad pud %p(%016lx)\n",		\  	       __FILE__, __LINE__, &(e), pud_val(e))  #define pgd_ERROR(e)					\ -	printk("%s:%d: bad pgd %p(%016lx).\n",		\ +	pr_err("%s:%d: bad pgd %p(%016lx)\n",		\  	       __FILE__, __LINE__, &(e), pgd_val(e))  struct mm_struct; diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index fce3f4ae5bd..fe1ec5bcd84 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -21,8 +21,9 @@ struct real_mode_header {  	u32	wakeup_header;  #endif  	/* APM/BIOS reboot */ -#ifdef CONFIG_X86_32  	u32	machine_real_restart_asm; +#ifdef CONFIG_X86_64 +	u32	machine_real_restart_seg;  #endif  }; diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 92f297069e8..a82c4f1b4d8 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -18,8 +18,8 @@ extern struct machine_ops machine_ops;  void native_machine_crash_shutdown(struct pt_regs *regs);  void native_machine_shutdown(void); -void machine_real_restart(unsigned int type); -/* These must match dispatch_table in reboot_32.S */ +void __noreturn machine_real_restart(unsigned int type); +/* These must match dispatch in arch/x86/realmore/rm/reboot.S */  #define MRR_BIOS	0  #define MRR_APM		1 diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index f48394513c3..2ffa95dc233 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -169,11 +169,6 @@ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);  void smp_store_cpu_info(int id);  #define cpu_physical_id(cpu)	per_cpu(x86_cpu_to_apicid, cpu) -/* We don't mark CPUs online until __cpu_up(), so we need another measure */ -static inline int num_booting_cpus(void) -{ -	return cpumask_weight(cpu_callout_mask); -}  #else /* !CONFIG_SMP */  #define wbinvd_on_cpu(cpu)     wbinvd()  static inline int wbinvd_on_all_cpus(void) diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 8e796fbbf9c..d8def8b3dba 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -17,6 +17,8 @@  /* Handles exceptions in both to and from, but doesn't do access_ok */  __must_check unsigned long +copy_user_enhanced_fast_string(void *to, const void *from, unsigned len); +__must_check unsigned long  copy_user_generic_string(void *to, const void *from, unsigned len);  __must_check unsigned long  copy_user_generic_unrolled(void *to, const void *from, unsigned len); @@ -26,9 +28,16 @@ copy_user_generic(void *to, const void *from, unsigned len)  {  	unsigned ret; -	alternative_call(copy_user_generic_unrolled, +	/* +	 * If CPU has ERMS feature, use copy_user_enhanced_fast_string. +	 * Otherwise, if CPU has rep_good feature, use copy_user_generic_string. +	 * Otherwise, use copy_user_generic_unrolled. +	 */ +	alternative_call_2(copy_user_generic_unrolled,  			 copy_user_generic_string,  			 X86_FEATURE_REP_GOOD, +			 copy_user_enhanced_fast_string, +			 X86_FEATURE_ERMS,  			 ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),  				     "=d" (len)),  			 "1" (to), "2" (from), "3" (len) diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 1e9bed14f7a..f3971bbcd1d 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -48,7 +48,7 @@ struct arch_uprobe_task {  #endif  }; -extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm); +extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);  extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);  extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);  extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 6149b476d9d..a06983cdc12 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -140,6 +140,9 @@  #define IPI_RESET_LIMIT			1  /* after this # consecutive successes, bump up the throttle if it was lowered */  #define COMPLETE_THRESHOLD		5 +/* after this # of giveups (fall back to kernel IPI's) disable the use of +   the BAU for a period of time */ +#define GIVEUP_LIMIT			100  #define UV_LB_SUBNODEID			0x10 @@ -166,7 +169,6 @@  #define FLUSH_RETRY_TIMEOUT		2  #define FLUSH_GIVEUP			3  #define FLUSH_COMPLETE			4 -#define FLUSH_RETRY_BUSYBUG		5  /*   * tuning the action when the numalink network is extremely delayed @@ -175,7 +177,7 @@  						   microseconds */  #define CONGESTED_REPS			10	/* long delays averaged over  						   this many broadcasts */ -#define CONGESTED_PERIOD		30	/* time for the bau to be +#define DISABLED_PERIOD			10	/* time for the bau to be  						   disabled, in seconds */  /* see msg_type: */  #define MSG_NOOP			0 @@ -520,6 +522,12 @@ struct ptc_stats {  	unsigned long	s_uv2_wars;		/* uv2 workaround, perm. busy */  	unsigned long	s_uv2_wars_hw;		/* uv2 workaround, hiwater */  	unsigned long	s_uv2_war_waits;	/* uv2 workaround, long waits */ +	unsigned long	s_overipilimit;		/* over the ipi reset limit */ +	unsigned long	s_giveuplimit;		/* disables, over giveup limit*/ +	unsigned long	s_enters;		/* entries to the driver */ +	unsigned long	s_ipifordisabled;	/* fall back to IPI; disabled */ +	unsigned long	s_plugged;		/* plugged by h/w bug*/ +	unsigned long	s_congested;		/* giveup on long wait */  	/* destination statistics */  	unsigned long	d_alltlb;		/* times all tlb's on this  						   cpu were flushed */ @@ -586,8 +594,8 @@ struct bau_control {  	int			timeout_tries;  	int			ipi_attempts;  	int			conseccompletes; -	int			baudisabled; -	int			set_bau_off; +	short			nobau; +	short			baudisabled;  	short			cpu;  	short			osnode;  	short			uvhub_cpu; @@ -596,14 +604,16 @@ struct bau_control {  	short			cpus_in_socket;  	short			cpus_in_uvhub;  	short			partition_base_pnode; -	short			using_desc; /* an index, like uvhub_cpu */ -	unsigned int		inuse_map; +	short			busy;       /* all were busy (war) */  	unsigned short		message_number;  	unsigned short		uvhub_quiesce;  	short			socket_acknowledge_count[DEST_Q_SIZE];  	cycles_t		send_message; +	cycles_t		period_end; +	cycles_t		period_time;  	spinlock_t		uvhub_lock;  	spinlock_t		queue_lock; +	spinlock_t		disable_lock;  	/* tunables */  	int			max_concurr;  	int			max_concurr_const; @@ -614,9 +624,9 @@ struct bau_control {  	int			complete_threshold;  	int			cong_response_us;  	int			cong_reps; -	int			cong_period; -	unsigned long		clocks_per_100_usec; -	cycles_t		period_time; +	cycles_t		disabled_period; +	int			period_giveups; +	int			giveup_limit;  	long			period_requests;  	struct hub_and_pnode	*thp;  }; diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h index 92e54abf89e..f90f0a587c6 100644 --- a/arch/x86/include/asm/x2apic.h +++ b/arch/x86/include/asm/x2apic.h @@ -9,15 +9,6 @@  #include <asm/ipi.h>  #include <linux/cpumask.h> -/* - * Need to use more than cpu 0, because we need more vectors - * when MSI-X are used. - */ -static const struct cpumask *x2apic_target_cpus(void) -{ -	return cpu_online_mask; -} -  static int x2apic_apic_id_valid(int apicid)  {  	return 1; @@ -28,15 +19,6 @@ static int x2apic_apic_id_registered(void)  	return 1;  } -/* - * For now each logical cpu is in its own vector allocation domain. - */ -static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	cpumask_clear(retmask); -	cpumask_set_cpu(cpu, retmask); -} -  static void  __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)  { diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index c090af10ac7..38155f66714 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -156,7 +156,6 @@ struct x86_cpuinit_ops {  /**   * struct x86_platform_ops - platform specific runtime functions   * @calibrate_tsc:		calibrate TSC - * @wallclock_init:		init the wallclock device   * @get_wallclock:		get time from HW clock like RTC etc.   * @set_wallclock:		set time back to HW clock   * @is_untracked_pat_range	exclude from PAT logic @@ -164,10 +163,10 @@ struct x86_cpuinit_ops {   * @i8042_detect		pre-detect if i8042 controller exists   * @save_sched_clock_state:	save state for sched_clock() on suspend   * @restore_sched_clock_state:	restore state for sched_clock() on resume + * @apic_post_init:		adjust apic if neeeded   */  struct x86_platform_ops {  	unsigned long (*calibrate_tsc)(void); -	void (*wallclock_init)(void);  	unsigned long (*get_wallclock)(void);  	int (*set_wallclock)(unsigned long nowtime);  	void (*iommu_shutdown)(void); @@ -177,6 +176,7 @@ struct x86_platform_ops {  	int (*i8042_detect)(void);  	void (*save_sched_clock_state)(void);  	void (*restore_sched_clock_state)(void); +	void (*apic_post_init)(void);  };  struct pci_dev; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 1f84794f075..931280ff829 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) "SMP alternatives: " fmt +  #include <linux/module.h>  #include <linux/sched.h>  #include <linux/mutex.h> @@ -63,8 +65,11 @@ static int __init setup_noreplace_paravirt(char *str)  __setup("noreplace-paravirt", setup_noreplace_paravirt);  #endif -#define DPRINTK(fmt, args...) if (debug_alternative) \ -	printk(KERN_DEBUG fmt, args) +#define DPRINTK(fmt, ...)				\ +do {							\ +	if (debug_alternative)				\ +		printk(KERN_DEBUG fmt, ##__VA_ARGS__);	\ +} while (0)  /*   * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes @@ -428,7 +433,7 @@ void alternatives_smp_switch(int smp)  	 * If this still occurs then you should see a hang  	 * or crash shortly after this line:  	 */ -	printk("lockdep: fixing up alternatives.\n"); +	pr_info("lockdep: fixing up alternatives\n");  #endif  	if (noreplace_smp || smp_alt_once || skip_smp_alternatives) @@ -444,14 +449,14 @@ void alternatives_smp_switch(int smp)  	if (smp == smp_mode) {  		/* nothing */  	} else if (smp) { -		printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); +		pr_info("switching to SMP code\n");  		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);  		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);  		list_for_each_entry(mod, &smp_alt_modules, next)  			alternatives_smp_lock(mod->locks, mod->locks_end,  					      mod->text, mod->text_end);  	} else { -		printk(KERN_INFO "SMP alternatives: switching to UP code\n"); +		pr_info("switching to UP code\n");  		set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);  		set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);  		list_for_each_entry(mod, &smp_alt_modules, next) @@ -546,7 +551,7 @@ void __init alternative_instructions(void)  #ifdef CONFIG_SMP  	if (smp_alt_once) {  		if (1 == num_possible_cpus()) { -			printk(KERN_INFO "SMP alternatives: switching to UP code\n"); +			pr_info("switching to UP code\n");  			set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);  			set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); @@ -664,7 +669,7 @@ static int __kprobes stop_machine_text_poke(void *data)  	struct text_poke_param *p;  	int i; -	if (atomic_dec_and_test(&stop_machine_first)) { +	if (atomic_xchg(&stop_machine_first, 0)) {  		for (i = 0; i < tpp->nparams; i++) {  			p = &tpp->params[i];  			text_poke(p->addr, p->opcode, p->len); diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index be16854591c..f29f6dd6bc0 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -2,6 +2,9 @@   * Shared support code for AMD K8 northbridges and derivates.   * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.   */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/types.h>  #include <linux/slab.h>  #include <linux/init.h> @@ -258,7 +261,7 @@ void amd_flush_garts(void)  	}  	spin_unlock_irqrestore(&gart_lock, flags);  	if (!flushed) -		printk("nothing to flush?\n"); +		pr_notice("nothing to flush?\n");  }  EXPORT_SYMBOL_GPL(amd_flush_garts); @@ -269,11 +272,10 @@ static __init int init_amd_nbs(void)  	err = amd_cache_northbridges();  	if (err < 0) -		printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n"); +		pr_notice("Cannot enumerate AMD northbridges\n");  	if (amd_cache_gart() < 0) -		printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, " -		       "GART support disabled.\n"); +		pr_notice("Cannot initialize GART flush words, GART support disabled\n");  	return err;  } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 39a222e094a..c421512ca5e 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2123,6 +2123,25 @@ void default_init_apic_ldr(void)  	apic_write(APIC_LDR, val);  } +int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, +				   const struct cpumask *andmask, +				   unsigned int *apicid) +{ +	unsigned int cpu; + +	for_each_cpu_and(cpu, cpumask, andmask) { +		if (cpumask_test_cpu(cpu, cpu_online_mask)) +			break; +	} + +	if (likely(cpu < nr_cpu_ids)) { +		*apicid = per_cpu(x86_cpu_to_apicid, cpu); +		return 0; +	} + +	return -EINVAL; +} +  /*   * Power management   */ diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 0e881c46e8c..00c77cf78e9 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -36,25 +36,6 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)  	return 1;  } -static const struct cpumask *flat_target_cpus(void) -{ -	return cpu_online_mask; -} - -static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	/* Careful. Some cpus do not strictly honor the set of cpus -	 * specified in the interrupt destination when using lowest -	 * priority interrupt delivery mode. -	 * -	 * In particular there was a hyperthreading cpu observed to -	 * deliver interrupts to the wrong hyperthread when only one -	 * hyperthread was specified in the interrupt desitination. -	 */ -	cpumask_clear(retmask); -	cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} -  /*   * Set up the logical destination ID.   * @@ -92,7 +73,7 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)  }  static void - flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) +flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)  {  	unsigned long mask = cpumask_bits(cpumask)[0];  	int cpu = smp_processor_id(); @@ -186,7 +167,7 @@ static struct apic apic_flat =  {  	.irq_delivery_mode		= dest_LowestPrio,  	.irq_dest_mode			= 1, /* logical */ -	.target_cpus			= flat_target_cpus, +	.target_cpus			= online_target_cpus,  	.disable_esr			= 0,  	.dest_logical			= APIC_DEST_LOGICAL,  	.check_apicid_used		= NULL, @@ -210,8 +191,7 @@ static struct apic apic_flat =  {  	.set_apic_id			= set_apic_id,  	.apic_id_mask			= 0xFFu << 24, -	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid, -	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and, +	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,  	.send_IPI_mask			= flat_send_IPI_mask,  	.send_IPI_mask_allbutself	= flat_send_IPI_mask_allbutself, @@ -262,17 +242,6 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)  	return 0;  } -static const struct cpumask *physflat_target_cpus(void) -{ -	return cpu_online_mask; -} - -static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	cpumask_clear(retmask); -	cpumask_set_cpu(cpu, retmask); -} -  static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)  {  	default_send_IPI_mask_sequence_phys(cpumask, vector); @@ -294,38 +263,6 @@ static void physflat_send_IPI_all(int vector)  	physflat_send_IPI_mask(cpu_online_mask, vector);  } -static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ -	int cpu; - -	/* -	 * We're using fixed IRQ delivery, can only return one phys APIC ID. -	 * May as well be the first. -	 */ -	cpu = cpumask_first(cpumask); -	if ((unsigned)cpu < nr_cpu_ids) -		return per_cpu(x86_cpu_to_apicid, cpu); -	else -		return BAD_APICID; -} - -static unsigned int -physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, -				const struct cpumask *andmask) -{ -	int cpu; - -	/* -	 * We're using fixed IRQ delivery, can only return one phys APIC ID. -	 * May as well be the first. -	 */ -	for_each_cpu_and(cpu, cpumask, andmask) { -		if (cpumask_test_cpu(cpu, cpu_online_mask)) -			break; -	} -	return per_cpu(x86_cpu_to_apicid, cpu); -} -  static int physflat_probe(void)  {  	if (apic == &apic_physflat || num_possible_cpus() > 8) @@ -345,13 +282,13 @@ static struct apic apic_physflat =  {  	.irq_delivery_mode		= dest_Fixed,  	.irq_dest_mode			= 0, /* physical */ -	.target_cpus			= physflat_target_cpus, +	.target_cpus			= online_target_cpus,  	.disable_esr			= 0,  	.dest_logical			= 0,  	.check_apicid_used		= NULL,  	.check_apicid_present		= NULL, -	.vector_allocation_domain	= physflat_vector_allocation_domain, +	.vector_allocation_domain	= default_vector_allocation_domain,  	/* not needed, but shouldn't hurt: */  	.init_apic_ldr			= flat_init_apic_ldr, @@ -370,8 +307,7 @@ static struct apic apic_physflat =  {  	.set_apic_id			= set_apic_id,  	.apic_id_mask			= 0xFFu << 24, -	.cpu_mask_to_apicid		= physflat_cpu_mask_to_apicid, -	.cpu_mask_to_apicid_and		= physflat_cpu_mask_to_apicid_and, +	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,  	.send_IPI_mask			= physflat_send_IPI_mask,  	.send_IPI_mask_allbutself	= physflat_send_IPI_mask_allbutself, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index a6e4c6e06c0..e145f28b409 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -100,12 +100,12 @@ static unsigned long noop_check_apicid_present(int bit)  	return physid_isset(bit, phys_cpu_present_map);  } -static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) +static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask, +					  const struct cpumask *mask)  {  	if (cpu != 0)  		pr_warning("APIC: Vector allocated for non-BSP cpu\n"); -	cpumask_clear(retmask); -	cpumask_set_cpu(cpu, retmask); +	cpumask_copy(retmask, cpumask_of(cpu));  }  static u32 noop_apic_read(u32 reg) @@ -159,8 +159,7 @@ struct apic apic_noop = {  	.set_apic_id			= NULL,  	.apic_id_mask			= 0x0F << 24, -	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid, -	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and, +	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,  	.send_IPI_mask			= noop_send_IPI_mask,  	.send_IPI_mask_allbutself	= noop_send_IPI_mask_allbutself, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 6ec6d5d297c..bc552cff257 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -72,17 +72,6 @@ static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)  	return initial_apic_id >> index_msb;  } -static const struct cpumask *numachip_target_cpus(void) -{ -	return cpu_online_mask; -} - -static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	cpumask_clear(retmask); -	cpumask_set_cpu(cpu, retmask); -} -  static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)  {  	union numachip_csr_g3_ext_irq_gen int_gen; @@ -157,38 +146,6 @@ static void numachip_send_IPI_self(int vector)  	__default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);  } -static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ -	int cpu; - -	/* -	 * We're using fixed IRQ delivery, can only return one phys APIC ID. -	 * May as well be the first. -	 */ -	cpu = cpumask_first(cpumask); -	if (likely((unsigned)cpu < nr_cpu_ids)) -		return per_cpu(x86_cpu_to_apicid, cpu); - -	return BAD_APICID; -} - -static unsigned int -numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask, -				const struct cpumask *andmask) -{ -	int cpu; - -	/* -	 * We're using fixed IRQ delivery, can only return one phys APIC ID. -	 * May as well be the first. -	 */ -	for_each_cpu_and(cpu, cpumask, andmask) { -		if (cpumask_test_cpu(cpu, cpu_online_mask)) -			break; -	} -	return per_cpu(x86_cpu_to_apicid, cpu); -} -  static int __init numachip_probe(void)  {  	return apic == &apic_numachip; @@ -253,13 +210,13 @@ static struct apic apic_numachip __refconst = {  	.irq_delivery_mode		= dest_Fixed,  	.irq_dest_mode			= 0, /* physical */ -	.target_cpus			= numachip_target_cpus, +	.target_cpus			= online_target_cpus,  	.disable_esr			= 0,  	.dest_logical			= 0,  	.check_apicid_used		= NULL,  	.check_apicid_present		= NULL, -	.vector_allocation_domain	= numachip_vector_allocation_domain, +	.vector_allocation_domain	= default_vector_allocation_domain,  	.init_apic_ldr			= flat_init_apic_ldr,  	.ioapic_phys_id_map		= NULL, @@ -277,8 +234,7 @@ static struct apic apic_numachip __refconst = {  	.set_apic_id			= set_apic_id,  	.apic_id_mask			= 0xffU << 24, -	.cpu_mask_to_apicid		= numachip_cpu_mask_to_apicid, -	.cpu_mask_to_apicid_and		= numachip_cpu_mask_to_apicid_and, +	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,  	.send_IPI_mask			= numachip_send_IPI_mask,  	.send_IPI_mask_allbutself	= numachip_send_IPI_mask_allbutself, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 31fbdbfbf96..d50e3640d5a 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -26,15 +26,6 @@ static int bigsmp_apic_id_registered(void)  	return 1;  } -static const struct cpumask *bigsmp_target_cpus(void) -{ -#ifdef CONFIG_SMP -	return cpu_online_mask; -#else -	return cpumask_of(0); -#endif -} -  static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)  {  	return 0; @@ -105,32 +96,6 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid)  	return 1;  } -/* As we are using single CPU as destination, pick only one CPU here */ -static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ -	int cpu = cpumask_first(cpumask); - -	if (cpu < nr_cpu_ids) -		return cpu_physical_id(cpu); -	return BAD_APICID; -} - -static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, -			      const struct cpumask *andmask) -{ -	int cpu; - -	/* -	 * We're using fixed IRQ delivery, can only return one phys APIC ID. -	 * May as well be the first. -	 */ -	for_each_cpu_and(cpu, cpumask, andmask) { -		if (cpumask_test_cpu(cpu, cpu_online_mask)) -			return cpu_physical_id(cpu); -	} -	return BAD_APICID; -} -  static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)  {  	return cpuid_apic >> index_msb; @@ -177,12 +142,6 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {  	{ } /* NULL entry stops DMI scanning */  }; -static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	cpumask_clear(retmask); -	cpumask_set_cpu(cpu, retmask); -} -  static int probe_bigsmp(void)  {  	if (def_to_bigsmp) @@ -205,13 +164,13 @@ static struct apic apic_bigsmp = {  	/* phys delivery to target CPU: */  	.irq_dest_mode			= 0, -	.target_cpus			= bigsmp_target_cpus, +	.target_cpus			= default_target_cpus,  	.disable_esr			= 1,  	.dest_logical			= 0,  	.check_apicid_used		= bigsmp_check_apicid_used,  	.check_apicid_present		= bigsmp_check_apicid_present, -	.vector_allocation_domain	= bigsmp_vector_allocation_domain, +	.vector_allocation_domain	= default_vector_allocation_domain,  	.init_apic_ldr			= bigsmp_init_apic_ldr,  	.ioapic_phys_id_map		= bigsmp_ioapic_phys_id_map, @@ -229,8 +188,7 @@ static struct apic apic_bigsmp = {  	.set_apic_id			= NULL,  	.apic_id_mask			= 0xFF << 24, -	.cpu_mask_to_apicid		= bigsmp_cpu_mask_to_apicid, -	.cpu_mask_to_apicid_and		= bigsmp_cpu_mask_to_apicid_and, +	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,  	.send_IPI_mask			= bigsmp_send_IPI_mask,  	.send_IPI_mask_allbutself	= NULL, diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index db4ab1be3c7..0874799a98c 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -394,21 +394,6 @@ static void es7000_enable_apic_mode(void)  		WARN(1, "Command failed, status = %x\n", mip_status);  } -static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	/* Careful. Some cpus do not strictly honor the set of cpus -	 * specified in the interrupt destination when using lowest -	 * priority interrupt delivery mode. -	 * -	 * In particular there was a hyperthreading cpu observed to -	 * deliver interrupts to the wrong hyperthread when only one -	 * hyperthread was specified in the interrupt desitination. -	 */ -	cpumask_clear(retmask); -	cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - -  static void es7000_wait_for_init_deassert(atomic_t *deassert)  {  	while (!atomic_read(deassert)) @@ -540,45 +525,49 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid)  	return 1;  } -static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask) +static inline int +es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)  {  	unsigned int round = 0; -	int cpu, uninitialized_var(apicid); +	unsigned int cpu, uninitialized_var(apicid);  	/*  	 * The cpus in the mask must all be on the apic cluster.  	 */ -	for_each_cpu(cpu, cpumask) { +	for_each_cpu_and(cpu, cpumask, cpu_online_mask) {  		int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);  		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {  			WARN(1, "Not a valid mask!"); -			return BAD_APICID; +			return -EINVAL;  		} -		apicid = new_apicid; +		apicid |= new_apicid;  		round++;  	} -	return apicid; +	if (!round) +		return -EINVAL; +	*dest_id = apicid; +	return 0;  } -static unsigned int +static int  es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, -			      const struct cpumask *andmask) +			      const struct cpumask *andmask, +			      unsigned int *apicid)  { -	int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);  	cpumask_var_t cpumask; +	*apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);  	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) -		return apicid; +		return 0;  	cpumask_and(cpumask, inmask, andmask); -	cpumask_and(cpumask, cpumask, cpu_online_mask); -	apicid = es7000_cpu_mask_to_apicid(cpumask); +	es7000_cpu_mask_to_apicid(cpumask, apicid);  	free_cpumask_var(cpumask); -	return apicid; +	return 0;  }  static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) @@ -638,7 +627,7 @@ static struct apic __refdata apic_es7000_cluster = {  	.check_apicid_used		= es7000_check_apicid_used,  	.check_apicid_present		= es7000_check_apicid_present, -	.vector_allocation_domain	= es7000_vector_allocation_domain, +	.vector_allocation_domain	= flat_vector_allocation_domain,  	.init_apic_ldr			= es7000_init_apic_ldr_cluster,  	.ioapic_phys_id_map		= es7000_ioapic_phys_id_map, @@ -656,7 +645,6 @@ static struct apic __refdata apic_es7000_cluster = {  	.set_apic_id			= NULL,  	.apic_id_mask			= 0xFF << 24, -	.cpu_mask_to_apicid		= es7000_cpu_mask_to_apicid,  	.cpu_mask_to_apicid_and		= es7000_cpu_mask_to_apicid_and,  	.send_IPI_mask			= es7000_send_IPI_mask, @@ -705,7 +693,7 @@ static struct apic __refdata apic_es7000 = {  	.check_apicid_used		= es7000_check_apicid_used,  	.check_apicid_present		= es7000_check_apicid_present, -	.vector_allocation_domain	= es7000_vector_allocation_domain, +	.vector_allocation_domain	= flat_vector_allocation_domain,  	.init_apic_ldr			= es7000_init_apic_ldr,  	.ioapic_phys_id_map		= es7000_ioapic_phys_id_map, @@ -723,7 +711,6 @@ static struct apic __refdata apic_es7000 = {  	.set_apic_id			= NULL,  	.apic_id_mask			= 0xFF << 24, -	.cpu_mask_to_apicid		= es7000_cpu_mask_to_apicid,  	.cpu_mask_to_apicid_and		= es7000_cpu_mask_to_apicid_and,  	.send_IPI_mask			= es7000_send_IPI_mask, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5f0ff597437..406eee78468 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -448,8 +448,8 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi  	entry = alloc_irq_pin_list(node);  	if (!entry) { -		printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", -				node, apic, pin); +		pr_err("can not alloc irq_pin_list (%d,%d,%d)\n", +		       node, apic, pin);  		return -ENOMEM;  	}  	entry->apic = apic; @@ -661,7 +661,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)  	ioapic_mask_entry(apic, pin);  	entry = ioapic_read_entry(apic, pin);  	if (entry.irr) -		printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n", +		pr_err("Unable to reset IRR for apic: %d, pin :%d\n",  		       mpc_ioapic_id(apic), pin);  } @@ -895,7 +895,7 @@ static int irq_polarity(int idx)  		}  		case 2: /* reserved */  		{ -			printk(KERN_WARNING "broken BIOS!!\n"); +			pr_warn("broken BIOS!!\n");  			polarity = 1;  			break;  		} @@ -906,7 +906,7 @@ static int irq_polarity(int idx)  		}  		default: /* invalid */  		{ -			printk(KERN_WARNING "broken BIOS!!\n"); +			pr_warn("broken BIOS!!\n");  			polarity = 1;  			break;  		} @@ -948,7 +948,7 @@ static int irq_trigger(int idx)  				}  				default:  				{ -					printk(KERN_WARNING "broken BIOS!!\n"); +					pr_warn("broken BIOS!!\n");  					trigger = 1;  					break;  				} @@ -962,7 +962,7 @@ static int irq_trigger(int idx)  		}  		case 2: /* reserved */  		{ -			printk(KERN_WARNING "broken BIOS!!\n"); +			pr_warn("broken BIOS!!\n");  			trigger = 1;  			break;  		} @@ -973,7 +973,7 @@ static int irq_trigger(int idx)  		}  		default: /* invalid */  		{ -			printk(KERN_WARNING "broken BIOS!!\n"); +			pr_warn("broken BIOS!!\n");  			trigger = 0;  			break;  		} @@ -991,7 +991,7 @@ static int pin_2_irq(int idx, int apic, int pin)  	 * Debugging check, we are in big trouble if this message pops up!  	 */  	if (mp_irqs[idx].dstirq != pin) -		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); +		pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");  	if (test_bit(bus, mp_bus_not_pci)) {  		irq = mp_irqs[idx].srcbusirq; @@ -1112,8 +1112,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)  	 * 0x80, because int 0x80 is hm, kind of importantish. ;)  	 */  	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; -	static int current_offset = VECTOR_OFFSET_START % 8; -	unsigned int old_vector; +	static int current_offset = VECTOR_OFFSET_START % 16;  	int cpu, err;  	cpumask_var_t tmp_mask; @@ -1123,35 +1122,45 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)  	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))  		return -ENOMEM; -	old_vector = cfg->vector; -	if (old_vector) { -		cpumask_and(tmp_mask, mask, cpu_online_mask); -		cpumask_and(tmp_mask, cfg->domain, tmp_mask); -		if (!cpumask_empty(tmp_mask)) { -			free_cpumask_var(tmp_mask); -			return 0; -		} -	} -  	/* Only try and allocate irqs on cpus that are present */  	err = -ENOSPC; -	for_each_cpu_and(cpu, mask, cpu_online_mask) { -		int new_cpu; -		int vector, offset; +	cpumask_clear(cfg->old_domain); +	cpu = cpumask_first_and(mask, cpu_online_mask); +	while (cpu < nr_cpu_ids) { +		int new_cpu, vector, offset; -		apic->vector_allocation_domain(cpu, tmp_mask); +		apic->vector_allocation_domain(cpu, tmp_mask, mask); + +		if (cpumask_subset(tmp_mask, cfg->domain)) { +			err = 0; +			if (cpumask_equal(tmp_mask, cfg->domain)) +				break; +			/* +			 * New cpumask using the vector is a proper subset of +			 * the current in use mask. So cleanup the vector +			 * allocation for the members that are not used anymore. +			 */ +			cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask); +			cfg->move_in_progress = 1; +			cpumask_and(cfg->domain, cfg->domain, tmp_mask); +			break; +		}  		vector = current_vector;  		offset = current_offset;  next: -		vector += 8; +		vector += 16;  		if (vector >= first_system_vector) { -			/* If out of vectors on large boxen, must share them. */ -			offset = (offset + 1) % 8; +			offset = (offset + 1) % 16;  			vector = FIRST_EXTERNAL_VECTOR + offset;  		} -		if (unlikely(current_vector == vector)) + +		if (unlikely(current_vector == vector)) { +			cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask); +			cpumask_andnot(tmp_mask, mask, cfg->old_domain); +			cpu = cpumask_first_and(tmp_mask, cpu_online_mask);  			continue; +		}  		if (test_bit(vector, used_vectors))  			goto next; @@ -1162,7 +1171,7 @@ next:  		/* Found one! */  		current_vector = vector;  		current_offset = offset; -		if (old_vector) { +		if (cfg->vector) {  			cfg->move_in_progress = 1;  			cpumask_copy(cfg->old_domain, cfg->domain);  		} @@ -1346,18 +1355,18 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,  	if (!IO_APIC_IRQ(irq))  		return; -	/* -	 * For legacy irqs, cfg->domain starts with cpu 0 for legacy -	 * controllers like 8259. Now that IO-APIC can handle this irq, update -	 * the cfg->domain. -	 */ -	if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) -		apic->vector_allocation_domain(0, cfg->domain);  	if (assign_irq_vector(irq, cfg, apic->target_cpus()))  		return; -	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); +	if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(), +					 &dest)) { +		pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n", +			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); +		__clear_irq_vector(irq, cfg); + +		return; +	}  	apic_printk(APIC_VERBOSE,KERN_DEBUG  		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " @@ -1366,7 +1375,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,  		    cfg->vector, irq, attr->trigger, attr->polarity, dest);  	if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { -		pr_warn("Failed to setup ioapic entry for ioapic  %d, pin %d\n", +		pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",  			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);  		__clear_irq_vector(irq, cfg); @@ -1469,9 +1478,10 @@ void setup_IO_APIC_irq_extra(u32 gsi)   * Set up the timer pin, possibly with the 8259A-master behind.   */  static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, -					 unsigned int pin, int vector) +					unsigned int pin, int vector)  {  	struct IO_APIC_route_entry entry; +	unsigned int dest;  	if (irq_remapping_enabled)  		return; @@ -1482,9 +1492,13 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,  	 * We use logical delivery to get the timer IRQ  	 * to the first CPU.  	 */ +	if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(), +						  apic->target_cpus(), &dest))) +		dest = BAD_APICID; +  	entry.dest_mode = apic->irq_dest_mode;  	entry.mask = 0;			/* don't mask IRQ for edge */ -	entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); +	entry.dest = dest;  	entry.delivery_mode = apic->irq_delivery_mode;  	entry.polarity = 0;  	entry.trigger = 0; @@ -1521,7 +1535,6 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)  		reg_03.raw = io_apic_read(ioapic_idx, 3);  	raw_spin_unlock_irqrestore(&ioapic_lock, flags); -	printk("\n");  	printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));  	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);  	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID); @@ -1578,7 +1591,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)  				i,  				ir_entry->index  			); -			printk("%1d   %1d    %1d    %1d   %1d   " +			pr_cont("%1d   %1d    %1d    %1d   %1d   "  				"%1d    %1d     %X    %02X\n",  				ir_entry->format,  				ir_entry->mask, @@ -1598,7 +1611,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)  				i,  				entry.dest  			); -			printk("%1d    %1d    %1d   %1d   %1d    " +			pr_cont("%1d    %1d    %1d   %1d   %1d    "  				"%1d    %1d    %02X\n",  				entry.mask,  				entry.trigger, @@ -1651,8 +1664,8 @@ __apicdebuginit(void) print_IO_APICs(void)  			continue;  		printk(KERN_DEBUG "IRQ%d ", irq);  		for_each_irq_pin(entry, cfg->irq_2_pin) -			printk("-> %d:%d", entry->apic, entry->pin); -		printk("\n"); +			pr_cont("-> %d:%d", entry->apic, entry->pin); +		pr_cont("\n");  	}  	printk(KERN_INFO ".................................... done.\n"); @@ -1665,9 +1678,9 @@ __apicdebuginit(void) print_APIC_field(int base)  	printk(KERN_DEBUG);  	for (i = 0; i < 8; i++) -		printk(KERN_CONT "%08x", apic_read(base + i*0x10)); +		pr_cont("%08x", apic_read(base + i*0x10)); -	printk(KERN_CONT "\n"); +	pr_cont("\n");  }  __apicdebuginit(void) print_local_APIC(void *dummy) @@ -1769,7 +1782,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy)  			printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);  		}  	} -	printk("\n"); +	pr_cont("\n");  }  __apicdebuginit(void) print_local_APICs(int maxcpu) @@ -2065,7 +2078,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)  		reg_00.raw = io_apic_read(ioapic_idx, 0);  		raw_spin_unlock_irqrestore(&ioapic_lock, flags);  		if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) -			printk("could not set ID!\n"); +			pr_cont("could not set ID!\n");  		else  			apic_printk(APIC_VERBOSE, " ok.\n");  	} @@ -2210,71 +2223,6 @@ void send_cleanup_vector(struct irq_cfg *cfg)  	cfg->move_in_progress = 0;  } -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) -{ -	int apic, pin; -	struct irq_pin_list *entry; -	u8 vector = cfg->vector; - -	for_each_irq_pin(entry, cfg->irq_2_pin) { -		unsigned int reg; - -		apic = entry->apic; -		pin = entry->pin; -		/* -		 * With interrupt-remapping, destination information comes -		 * from interrupt-remapping table entry. -		 */ -		if (!irq_remapped(cfg)) -			io_apic_write(apic, 0x11 + pin*2, dest); -		reg = io_apic_read(apic, 0x10 + pin*2); -		reg &= ~IO_APIC_REDIR_VECTOR_MASK; -		reg |= vector; -		io_apic_modify(apic, 0x10 + pin*2, reg); -	} -} - -/* - * Either sets data->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and - * leaves data->affinity untouched. - */ -int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, -			  unsigned int *dest_id) -{ -	struct irq_cfg *cfg = data->chip_data; - -	if (!cpumask_intersects(mask, cpu_online_mask)) -		return -1; - -	if (assign_irq_vector(data->irq, data->chip_data, mask)) -		return -1; - -	cpumask_copy(data->affinity, mask); - -	*dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain); -	return 0; -} - -static int -ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, -		    bool force) -{ -	unsigned int dest, irq = data->irq; -	unsigned long flags; -	int ret; - -	raw_spin_lock_irqsave(&ioapic_lock, flags); -	ret = __ioapic_set_affinity(data, mask, &dest); -	if (!ret) { -		/* Only the high 8 bits are valid. */ -		dest = SET_APIC_LOGICAL_ID(dest); -		__target_IO_APIC_irq(irq, dest, data->chip_data); -	} -	raw_spin_unlock_irqrestore(&ioapic_lock, flags); -	return ret; -} -  asmlinkage void smp_irq_move_cleanup_interrupt(void)  {  	unsigned vector, me; @@ -2362,6 +2310,87 @@ void irq_force_complete_move(int irq)  static inline void irq_complete_move(struct irq_cfg *cfg) { }  #endif +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) +{ +	int apic, pin; +	struct irq_pin_list *entry; +	u8 vector = cfg->vector; + +	for_each_irq_pin(entry, cfg->irq_2_pin) { +		unsigned int reg; + +		apic = entry->apic; +		pin = entry->pin; +		/* +		 * With interrupt-remapping, destination information comes +		 * from interrupt-remapping table entry. +		 */ +		if (!irq_remapped(cfg)) +			io_apic_write(apic, 0x11 + pin*2, dest); +		reg = io_apic_read(apic, 0x10 + pin*2); +		reg &= ~IO_APIC_REDIR_VECTOR_MASK; +		reg |= vector; +		io_apic_modify(apic, 0x10 + pin*2, reg); +	} +} + +/* + * Either sets data->affinity to a valid value, and returns + * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and + * leaves data->affinity untouched. + */ +int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, +			  unsigned int *dest_id) +{ +	struct irq_cfg *cfg = data->chip_data; +	unsigned int irq = data->irq; +	int err; + +	if (!config_enabled(CONFIG_SMP)) +		return -1; + +	if (!cpumask_intersects(mask, cpu_online_mask)) +		return -EINVAL; + +	err = assign_irq_vector(irq, cfg, mask); +	if (err) +		return err; + +	err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id); +	if (err) { +		if (assign_irq_vector(irq, cfg, data->affinity)) +			pr_err("Failed to recover vector for irq %d\n", irq); +		return err; +	} + +	cpumask_copy(data->affinity, mask); + +	return 0; +} + +static int +ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, +		    bool force) +{ +	unsigned int dest, irq = data->irq; +	unsigned long flags; +	int ret; + +	if (!config_enabled(CONFIG_SMP)) +		return -1; + +	raw_spin_lock_irqsave(&ioapic_lock, flags); +	ret = __ioapic_set_affinity(data, mask, &dest); +	if (!ret) { +		/* Only the high 8 bits are valid. */ +		dest = SET_APIC_LOGICAL_ID(dest); +		__target_IO_APIC_irq(irq, dest, data->chip_data); +		ret = IRQ_SET_MASK_OK_NOCOPY; +	} +	raw_spin_unlock_irqrestore(&ioapic_lock, flags); +	return ret; +} +  static void ack_apic_edge(struct irq_data *data)  {  	irq_complete_move(data->chip_data); @@ -2541,9 +2570,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip)  	chip->irq_ack = ir_ack_apic_edge;  	chip->irq_eoi = ir_ack_apic_level; -#ifdef CONFIG_SMP  	chip->irq_set_affinity = set_remapped_irq_affinity; -#endif  }  #endif /* CONFIG_IRQ_REMAP */ @@ -2554,9 +2581,7 @@ static struct irq_chip ioapic_chip __read_mostly = {  	.irq_unmask		= unmask_ioapic_irq,  	.irq_ack		= ack_apic_edge,  	.irq_eoi		= ack_apic_level, -#ifdef CONFIG_SMP  	.irq_set_affinity	= ioapic_set_affinity, -#endif  	.irq_retrigger		= ioapic_retrigger_irq,  }; @@ -3038,7 +3063,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,  	if (err)  		return err; -	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); +	err = apic->cpu_mask_to_apicid_and(cfg->domain, +					   apic->target_cpus(), &dest); +	if (err) +		return err;  	if (irq_remapped(cfg)) {  		compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); @@ -3072,7 +3100,6 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,  	return err;  } -#ifdef CONFIG_SMP  static int  msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)  { @@ -3092,9 +3119,8 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)  	__write_msi_msg(data->msi_desc, &msg); -	return 0; +	return IRQ_SET_MASK_OK_NOCOPY;  } -#endif /* CONFIG_SMP */  /*   * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, @@ -3105,9 +3131,7 @@ static struct irq_chip msi_chip = {  	.irq_unmask		= unmask_msi_irq,  	.irq_mask		= mask_msi_irq,  	.irq_ack		= ack_apic_edge, -#ifdef CONFIG_SMP  	.irq_set_affinity	= msi_set_affinity, -#endif  	.irq_retrigger		= ioapic_retrigger_irq,  }; @@ -3192,7 +3216,6 @@ void native_teardown_msi_irq(unsigned int irq)  }  #ifdef CONFIG_DMAR_TABLE -#ifdef CONFIG_SMP  static int  dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,  		      bool force) @@ -3214,19 +3237,15 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,  	dmar_msi_write(irq, &msg); -	return 0; +	return IRQ_SET_MASK_OK_NOCOPY;  } -#endif /* CONFIG_SMP */ -  static struct irq_chip dmar_msi_type = {  	.name			= "DMAR_MSI",  	.irq_unmask		= dmar_msi_unmask,  	.irq_mask		= dmar_msi_mask,  	.irq_ack		= ack_apic_edge, -#ifdef CONFIG_SMP  	.irq_set_affinity	= dmar_msi_set_affinity, -#endif  	.irq_retrigger		= ioapic_retrigger_irq,  }; @@ -3247,7 +3266,6 @@ int arch_setup_dmar_msi(unsigned int irq)  #ifdef CONFIG_HPET_TIMER -#ifdef CONFIG_SMP  static int hpet_msi_set_affinity(struct irq_data *data,  				 const struct cpumask *mask, bool force)  { @@ -3267,19 +3285,15 @@ static int hpet_msi_set_affinity(struct irq_data *data,  	hpet_msi_write(data->handler_data, &msg); -	return 0; +	return IRQ_SET_MASK_OK_NOCOPY;  } -#endif /* CONFIG_SMP */ -  static struct irq_chip hpet_msi_type = {  	.name = "HPET_MSI",  	.irq_unmask = hpet_msi_unmask,  	.irq_mask = hpet_msi_mask,  	.irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP  	.irq_set_affinity = hpet_msi_set_affinity, -#endif  	.irq_retrigger = ioapic_retrigger_irq,  }; @@ -3314,8 +3328,6 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)   */  #ifdef CONFIG_HT_IRQ -#ifdef CONFIG_SMP -  static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)  {  	struct ht_irq_msg msg; @@ -3340,25 +3352,23 @@ ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)  		return -1;  	target_ht_irq(data->irq, dest, cfg->vector); -	return 0; +	return IRQ_SET_MASK_OK_NOCOPY;  } -#endif -  static struct irq_chip ht_irq_chip = {  	.name			= "PCI-HT",  	.irq_mask		= mask_ht_irq,  	.irq_unmask		= unmask_ht_irq,  	.irq_ack		= ack_apic_edge, -#ifdef CONFIG_SMP  	.irq_set_affinity	= ht_set_affinity, -#endif  	.irq_retrigger		= ioapic_retrigger_irq,  };  int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)  {  	struct irq_cfg *cfg; +	struct ht_irq_msg msg; +	unsigned dest;  	int err;  	if (disable_apic) @@ -3366,36 +3376,37 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)  	cfg = irq_cfg(irq);  	err = assign_irq_vector(irq, cfg, apic->target_cpus()); -	if (!err) { -		struct ht_irq_msg msg; -		unsigned dest; +	if (err) +		return err; + +	err = apic->cpu_mask_to_apicid_and(cfg->domain, +					   apic->target_cpus(), &dest); +	if (err) +		return err; -		dest = apic->cpu_mask_to_apicid_and(cfg->domain, -						    apic->target_cpus()); +	msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); -		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); +	msg.address_lo = +		HT_IRQ_LOW_BASE | +		HT_IRQ_LOW_DEST_ID(dest) | +		HT_IRQ_LOW_VECTOR(cfg->vector) | +		((apic->irq_dest_mode == 0) ? +			HT_IRQ_LOW_DM_PHYSICAL : +			HT_IRQ_LOW_DM_LOGICAL) | +		HT_IRQ_LOW_RQEOI_EDGE | +		((apic->irq_delivery_mode != dest_LowestPrio) ? +			HT_IRQ_LOW_MT_FIXED : +			HT_IRQ_LOW_MT_ARBITRATED) | +		HT_IRQ_LOW_IRQ_MASKED; -		msg.address_lo = -			HT_IRQ_LOW_BASE | -			HT_IRQ_LOW_DEST_ID(dest) | -			HT_IRQ_LOW_VECTOR(cfg->vector) | -			((apic->irq_dest_mode == 0) ? -				HT_IRQ_LOW_DM_PHYSICAL : -				HT_IRQ_LOW_DM_LOGICAL) | -			HT_IRQ_LOW_RQEOI_EDGE | -			((apic->irq_delivery_mode != dest_LowestPrio) ? -				HT_IRQ_LOW_MT_FIXED : -				HT_IRQ_LOW_MT_ARBITRATED) | -			HT_IRQ_LOW_IRQ_MASKED; +	write_ht_irq_msg(irq, &msg); -		write_ht_irq_msg(irq, &msg); +	irq_set_chip_and_handler_name(irq, &ht_irq_chip, +				      handle_edge_irq, "edge"); -		irq_set_chip_and_handler_name(irq, &ht_irq_chip, -					      handle_edge_irq, "edge"); +	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); -		dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); -	} -	return err; +	return 0;  }  #endif /* CONFIG_HT_IRQ */ @@ -3563,7 +3574,8 @@ static int __init io_apic_get_unique_id(int ioapic, int apic_id)  		/* Sanity check */  		if (reg_00.bits.ID != apic_id) { -			printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); +			pr_err("IOAPIC[%d]: Unable to change apic_id!\n", +			       ioapic);  			return -1;  		}  	} diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index f00a68cca37..d661ee95cab 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -406,16 +406,13 @@ static inline int numaq_check_phys_apicid_present(int phys_apicid)   * We use physical apicids here, not logical, so just return the default   * physical broadcast to stop people from breaking us   */ -static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ -	return 0x0F; -} - -static inline unsigned int +static int  numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, -			     const struct cpumask *andmask) +			     const struct cpumask *andmask, +			     unsigned int *apicid)  { -	return 0x0F; +	*apicid = 0x0F; +	return 0;  }  /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ @@ -441,20 +438,6 @@ static int probe_numaq(void)  	return found_numaq;  } -static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	/* Careful. Some cpus do not strictly honor the set of cpus -	 * specified in the interrupt destination when using lowest -	 * priority interrupt delivery mode. -	 * -	 * In particular there was a hyperthreading cpu observed to -	 * deliver interrupts to the wrong hyperthread when only one -	 * hyperthread was specified in the interrupt desitination. -	 */ -	cpumask_clear(retmask); -	cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} -  static void numaq_setup_portio_remap(void)  {  	int num_quads = num_online_nodes(); @@ -491,7 +474,7 @@ static struct apic __refdata apic_numaq = {  	.check_apicid_used		= numaq_check_apicid_used,  	.check_apicid_present		= numaq_check_apicid_present, -	.vector_allocation_domain	= numaq_vector_allocation_domain, +	.vector_allocation_domain	= flat_vector_allocation_domain,  	.init_apic_ldr			= numaq_init_apic_ldr,  	.ioapic_phys_id_map		= numaq_ioapic_phys_id_map, @@ -509,7 +492,6 @@ static struct apic __refdata apic_numaq = {  	.set_apic_id			= NULL,  	.apic_id_mask			= 0x0F << 24, -	.cpu_mask_to_apicid		= numaq_cpu_mask_to_apicid,  	.cpu_mask_to_apicid_and		= numaq_cpu_mask_to_apicid_and,  	.send_IPI_mask			= numaq_send_IPI_mask, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1b291da09e6..eb35ef9ee63 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -66,21 +66,6 @@ static void setup_apic_flat_routing(void)  #endif  } -static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	/* -	 * Careful. Some cpus do not strictly honor the set of cpus -	 * specified in the interrupt destination when using lowest -	 * priority interrupt delivery mode. -	 * -	 * In particular there was a hyperthreading cpu observed to -	 * deliver interrupts to the wrong hyperthread when only one -	 * hyperthread was specified in the interrupt desitination. -	 */ -	cpumask_clear(retmask); -	cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} -  /* should be called last. */  static int probe_default(void)  { @@ -105,7 +90,7 @@ static struct apic apic_default = {  	.check_apicid_used		= default_check_apicid_used,  	.check_apicid_present		= default_check_apicid_present, -	.vector_allocation_domain	= default_vector_allocation_domain, +	.vector_allocation_domain	= flat_vector_allocation_domain,  	.init_apic_ldr			= default_init_apic_ldr,  	.ioapic_phys_id_map		= default_ioapic_phys_id_map, @@ -123,8 +108,7 @@ static struct apic apic_default = {  	.set_apic_id			= NULL,  	.apic_id_mask			= 0x0F << 24, -	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid, -	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and, +	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,  	.send_IPI_mask			= default_send_IPI_mask_logical,  	.send_IPI_mask_allbutself	= default_send_IPI_mask_allbutself_logical, @@ -208,6 +192,9 @@ void __init default_setup_apic_routing(void)  	if (apic->setup_apic_routing)  		apic->setup_apic_routing(); + +	if (x86_platform.apic_post_init) +		x86_platform.apic_post_init();  }  void __init generic_apic_probe(void) diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 3fe98669892..1793dba7a74 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -23,11 +23,6 @@  #include <asm/ipi.h>  #include <asm/setup.h> -static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) -{ -	return hard_smp_processor_id() >> index_msb; -} -  /*   * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.   */ @@ -48,10 +43,8 @@ void __init default_setup_apic_routing(void)  		}  	} -	if (is_vsmp_box()) { -		/* need to update phys_pkg_id */ -		apic->phys_pkg_id = apicid_phys_pkg_id; -	} +	if (x86_platform.apic_post_init) +		x86_platform.apic_post_init();  }  /* Same for both flat and physical. */ diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 659897c0075..77c95c0e1bf 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -26,6 +26,8 @@   *   */ +#define pr_fmt(fmt) "summit: %s: " fmt, __func__ +  #include <linux/mm.h>  #include <linux/init.h>  #include <asm/io.h> @@ -235,8 +237,8 @@ static int summit_apic_id_registered(void)  static void summit_setup_apic_routing(void)  { -	printk("Enabling APIC mode:  Summit.  Using %d I/O APICs\n", -						nr_ioapics); +	pr_info("Enabling APIC mode:  Summit.  Using %d I/O APICs\n", +		nr_ioapics);  }  static int summit_cpu_present_to_apicid(int mps_cpu) @@ -263,43 +265,48 @@ static int summit_check_phys_apicid_present(int physical_apicid)  	return 1;  } -static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask) +static inline int +summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)  {  	unsigned int round = 0; -	int cpu, apicid = 0; +	unsigned int cpu, apicid = 0;  	/*  	 * The cpus in the mask must all be on the apic cluster.  	 */ -	for_each_cpu(cpu, cpumask) { +	for_each_cpu_and(cpu, cpumask, cpu_online_mask) {  		int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);  		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { -			printk("%s: Not a valid mask!\n", __func__); -			return BAD_APICID; +			pr_err("Not a valid mask!\n"); +			return -EINVAL;  		}  		apicid |= new_apicid;  		round++;  	} -	return apicid; +	if (!round) +		return -EINVAL; +	*dest_id = apicid; +	return 0;  } -static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, -			      const struct cpumask *andmask) +static int +summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, +			      const struct cpumask *andmask, +			      unsigned int *apicid)  { -	int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);  	cpumask_var_t cpumask; +	*apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);  	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) -		return apicid; +		return 0;  	cpumask_and(cpumask, inmask, andmask); -	cpumask_and(cpumask, cpumask, cpu_online_mask); -	apicid = summit_cpu_mask_to_apicid(cpumask); +	summit_cpu_mask_to_apicid(cpumask, apicid);  	free_cpumask_var(cpumask); -	return apicid; +	return 0;  }  /* @@ -320,20 +327,6 @@ static int probe_summit(void)  	return 0;  } -static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	/* Careful. Some cpus do not strictly honor the set of cpus -	 * specified in the interrupt destination when using lowest -	 * priority interrupt delivery mode. -	 * -	 * In particular there was a hyperthreading cpu observed to -	 * deliver interrupts to the wrong hyperthread when only one -	 * hyperthread was specified in the interrupt desitination. -	 */ -	cpumask_clear(retmask); -	cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} -  #ifdef CONFIG_X86_SUMMIT_NUMA  static struct rio_table_hdr *rio_table_hdr;  static struct scal_detail   *scal_devs[MAX_NUMNODES]; @@ -355,7 +348,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)  		}  	}  	if (i == rio_table_hdr->num_rio_dev) { -		printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__); +		pr_err("Couldn't find owner Cyclone for Winnipeg!\n");  		return last_bus;  	} @@ -366,7 +359,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)  		}  	}  	if (i == rio_table_hdr->num_scal_dev) { -		printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__); +		pr_err("Couldn't find owner Twister for Cyclone!\n");  		return last_bus;  	} @@ -396,7 +389,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)  		num_buses = 9;  		break;  	default: -		printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__); +		pr_info("Unsupported Winnipeg type!\n");  		return last_bus;  	} @@ -411,13 +404,15 @@ static int build_detail_arrays(void)  	int i, scal_detail_size, rio_detail_size;  	if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) { -		printk(KERN_WARNING "%s: MAX_NUMNODES too low!  Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev); +		pr_warn("MAX_NUMNODES too low!  Defined as %d, but system has %d nodes\n", +			MAX_NUMNODES, rio_table_hdr->num_scal_dev);  		return 0;  	}  	switch (rio_table_hdr->version) {  	default: -		printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version); +		pr_warn("Invalid Rio Grande Table Version: %d\n", +			rio_table_hdr->version);  		return 0;  	case 2:  		scal_detail_size = 11; @@ -462,7 +457,7 @@ void setup_summit(void)  		offset = *((unsigned short *)(ptr + offset));  	}  	if (!rio_table_hdr) { -		printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__); +		pr_err("Unable to locate Rio Grande Table in EBDA - bailing!\n");  		return;  	} @@ -509,7 +504,7 @@ static struct apic apic_summit = {  	.check_apicid_used		= summit_check_apicid_used,  	.check_apicid_present		= summit_check_apicid_present, -	.vector_allocation_domain	= summit_vector_allocation_domain, +	.vector_allocation_domain	= flat_vector_allocation_domain,  	.init_apic_ldr			= summit_init_apic_ldr,  	.ioapic_phys_id_map		= summit_ioapic_phys_id_map, @@ -527,7 +522,6 @@ static struct apic apic_summit = {  	.set_apic_id			= NULL,  	.apic_id_mask			= 0xFF << 24, -	.cpu_mask_to_apicid		= summit_cpu_mask_to_apicid,  	.cpu_mask_to_apicid_and		= summit_cpu_mask_to_apicid_and,  	.send_IPI_mask			= summit_send_IPI_mask, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index ff35cff0e1a..c88baa4ff0e 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -81,7 +81,7 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)  }  static void - x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) +x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)  {  	__x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);  } @@ -96,36 +96,37 @@ static void x2apic_send_IPI_all(int vector)  	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);  } -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) +static int +x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, +			      const struct cpumask *andmask, +			      unsigned int *apicid)  { -	/* -	 * We're using fixed IRQ delivery, can only return one logical APIC ID. -	 * May as well be the first. -	 */ -	int cpu = cpumask_first(cpumask); +	u32 dest = 0; +	u16 cluster; +	int i; -	if ((unsigned)cpu < nr_cpu_ids) -		return per_cpu(x86_cpu_to_logical_apicid, cpu); -	else -		return BAD_APICID; -} +	for_each_cpu_and(i, cpumask, andmask) { +		if (!cpumask_test_cpu(i, cpu_online_mask)) +			continue; +		dest = per_cpu(x86_cpu_to_logical_apicid, i); +		cluster = x2apic_cluster(i); +		break; +	} -static unsigned int -x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, -			      const struct cpumask *andmask) -{ -	int cpu; +	if (!dest) +		return -EINVAL; -	/* -	 * We're using fixed IRQ delivery, can only return one logical APIC ID. -	 * May as well be the first. -	 */ -	for_each_cpu_and(cpu, cpumask, andmask) { -		if (cpumask_test_cpu(cpu, cpu_online_mask)) -			break; +	for_each_cpu_and(i, cpumask, andmask) { +		if (!cpumask_test_cpu(i, cpu_online_mask)) +			continue; +		if (cluster != x2apic_cluster(i)) +			continue; +		dest |= per_cpu(x86_cpu_to_logical_apicid, i);  	} -	return per_cpu(x86_cpu_to_logical_apicid, cpu); +	*apicid = dest; + +	return 0;  }  static void init_x2apic_ldr(void) @@ -208,6 +209,32 @@ static int x2apic_cluster_probe(void)  		return 0;  } +static const struct cpumask *x2apic_cluster_target_cpus(void) +{ +	return cpu_all_mask; +} + +/* + * Each x2apic cluster is an allocation domain. + */ +static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask, +					     const struct cpumask *mask) +{ +	/* +	 * To minimize vector pressure, default case of boot, device bringup +	 * etc will use a single cpu for the interrupt destination. +	 * +	 * On explicit migration requests coming from irqbalance etc, +	 * interrupts will be routed to the x2apic cluster (cluster-id +	 * derived from the first cpu in the mask) members specified +	 * in the mask. +	 */ +	if (mask == x2apic_cluster_target_cpus()) +		cpumask_copy(retmask, cpumask_of(cpu)); +	else +		cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); +} +  static struct apic apic_x2apic_cluster = {  	.name				= "cluster x2apic", @@ -219,13 +246,13 @@ static struct apic apic_x2apic_cluster = {  	.irq_delivery_mode		= dest_LowestPrio,  	.irq_dest_mode			= 1, /* logical */ -	.target_cpus			= x2apic_target_cpus, +	.target_cpus			= x2apic_cluster_target_cpus,  	.disable_esr			= 0,  	.dest_logical			= APIC_DEST_LOGICAL,  	.check_apicid_used		= NULL,  	.check_apicid_present		= NULL, -	.vector_allocation_domain	= x2apic_vector_allocation_domain, +	.vector_allocation_domain	= cluster_vector_allocation_domain,  	.init_apic_ldr			= init_x2apic_ldr,  	.ioapic_phys_id_map		= NULL, @@ -243,7 +270,6 @@ static struct apic apic_x2apic_cluster = {  	.set_apic_id			= x2apic_set_apic_id,  	.apic_id_mask			= 0xFFFFFFFFu, -	.cpu_mask_to_apicid		= x2apic_cpu_mask_to_apicid,  	.cpu_mask_to_apicid_and		= x2apic_cpu_mask_to_apicid_and,  	.send_IPI_mask			= x2apic_send_IPI_mask, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index c17e982db27..e03a1e180e8 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -76,38 +76,6 @@ static void x2apic_send_IPI_all(int vector)  	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);  } -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ -	/* -	 * We're using fixed IRQ delivery, can only return one phys APIC ID. -	 * May as well be the first. -	 */ -	int cpu = cpumask_first(cpumask); - -	if ((unsigned)cpu < nr_cpu_ids) -		return per_cpu(x86_cpu_to_apicid, cpu); -	else -		return BAD_APICID; -} - -static unsigned int -x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, -			      const struct cpumask *andmask) -{ -	int cpu; - -	/* -	 * We're using fixed IRQ delivery, can only return one phys APIC ID. -	 * May as well be the first. -	 */ -	for_each_cpu_and(cpu, cpumask, andmask) { -		if (cpumask_test_cpu(cpu, cpu_online_mask)) -			break; -	} - -	return per_cpu(x86_cpu_to_apicid, cpu); -} -  static void init_x2apic_ldr(void)  {  } @@ -131,13 +99,13 @@ static struct apic apic_x2apic_phys = {  	.irq_delivery_mode		= dest_Fixed,  	.irq_dest_mode			= 0, /* physical */ -	.target_cpus			= x2apic_target_cpus, +	.target_cpus			= online_target_cpus,  	.disable_esr			= 0,  	.dest_logical			= 0,  	.check_apicid_used		= NULL,  	.check_apicid_present		= NULL, -	.vector_allocation_domain	= x2apic_vector_allocation_domain, +	.vector_allocation_domain	= default_vector_allocation_domain,  	.init_apic_ldr			= init_x2apic_ldr,  	.ioapic_phys_id_map		= NULL, @@ -155,8 +123,7 @@ static struct apic apic_x2apic_phys = {  	.set_apic_id			= x2apic_set_apic_id,  	.apic_id_mask			= 0xFFFFFFFFu, -	.cpu_mask_to_apicid		= x2apic_cpu_mask_to_apicid, -	.cpu_mask_to_apicid_and		= x2apic_cpu_mask_to_apicid_and, +	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,  	.send_IPI_mask			= x2apic_send_IPI_mask,  	.send_IPI_mask_allbutself	= x2apic_send_IPI_mask_allbutself, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index c6d03f7a440..8cfade9510a 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -185,17 +185,6 @@ EXPORT_SYMBOL_GPL(uv_possible_blades);  unsigned long sn_rtc_cycles_per_second;  EXPORT_SYMBOL(sn_rtc_cycles_per_second); -static const struct cpumask *uv_target_cpus(void) -{ -	return cpu_online_mask; -} - -static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	cpumask_clear(retmask); -	cpumask_set_cpu(cpu, retmask); -} -  static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)  {  #ifdef CONFIG_SMP @@ -280,25 +269,12 @@ static void uv_init_apic_ldr(void)  {  } -static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ -	/* -	 * We're using fixed IRQ delivery, can only return one phys APIC ID. -	 * May as well be the first. -	 */ -	int cpu = cpumask_first(cpumask); - -	if ((unsigned)cpu < nr_cpu_ids) -		return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; -	else -		return BAD_APICID; -} - -static unsigned int +static int  uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, -			  const struct cpumask *andmask) +			  const struct cpumask *andmask, +			  unsigned int *apicid)  { -	int cpu; +	int unsigned cpu;  	/*  	 * We're using fixed IRQ delivery, can only return one phys APIC ID. @@ -308,7 +284,13 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,  		if (cpumask_test_cpu(cpu, cpu_online_mask))  			break;  	} -	return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; + +	if (likely(cpu < nr_cpu_ids)) { +		*apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; +		return 0; +	} + +	return -EINVAL;  }  static unsigned int x2apic_get_apic_id(unsigned long x) @@ -362,13 +344,13 @@ static struct apic __refdata apic_x2apic_uv_x = {  	.irq_delivery_mode		= dest_Fixed,  	.irq_dest_mode			= 0, /* physical */ -	.target_cpus			= uv_target_cpus, +	.target_cpus			= online_target_cpus,  	.disable_esr			= 0,  	.dest_logical			= APIC_DEST_LOGICAL,  	.check_apicid_used		= NULL,  	.check_apicid_present		= NULL, -	.vector_allocation_domain	= uv_vector_allocation_domain, +	.vector_allocation_domain	= default_vector_allocation_domain,  	.init_apic_ldr			= uv_init_apic_ldr,  	.ioapic_phys_id_map		= NULL, @@ -386,7 +368,6 @@ static struct apic __refdata apic_x2apic_uv_x = {  	.set_apic_id			= set_apic_id,  	.apic_id_mask			= 0xFFFFFFFFu, -	.cpu_mask_to_apicid		= uv_cpu_mask_to_apicid,  	.cpu_mask_to_apicid_and		= uv_cpu_mask_to_apicid_and,  	.send_IPI_mask			= uv_send_IPI_mask, diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 07b0c0db466..d65464e4350 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -201,6 +201,8 @@   *    http://www.microsoft.com/whdc/archive/amp_12.mspx]   */ +#define pr_fmt(fmt) "apm: " fmt +  #include <linux/module.h>  #include <linux/poll.h> @@ -485,11 +487,11 @@ static void apm_error(char *str, int err)  		if (error_table[i].key == err)  			break;  	if (i < ERROR_COUNT) -		printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg); +		pr_notice("%s: %s\n", str, error_table[i].msg);  	else if (err < 0) -		printk(KERN_NOTICE "apm: %s: linux error code %i\n", str, err); +		pr_notice("%s: linux error code %i\n", str, err);  	else -		printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n", +		pr_notice("%s: unknown error code %#2.2x\n",  		       str, err);  } @@ -1184,7 +1186,7 @@ static void queue_event(apm_event_t event, struct apm_user *sender)  			static int notified;  			if (notified++ == 0) -			    printk(KERN_ERR "apm: an event queue overflowed\n"); +				pr_err("an event queue overflowed\n");  			if (++as->event_tail >= APM_MAX_EVENTS)  				as->event_tail = 0;  		} @@ -1447,7 +1449,7 @@ static void apm_mainloop(void)  static int check_apm_user(struct apm_user *as, const char *func)  {  	if (as == NULL || as->magic != APM_BIOS_MAGIC) { -		printk(KERN_ERR "apm: %s passed bad filp\n", func); +		pr_err("%s passed bad filp\n", func);  		return 1;  	}  	return 0; @@ -1586,7 +1588,7 @@ static int do_release(struct inode *inode, struct file *filp)  		     as1 = as1->next)  			;  		if (as1 == NULL) -			printk(KERN_ERR "apm: filp not in user list\n"); +			pr_err("filp not in user list\n");  		else  			as1->next = as->next;  	} @@ -1600,11 +1602,9 @@ static int do_open(struct inode *inode, struct file *filp)  	struct apm_user *as;  	as = kmalloc(sizeof(*as), GFP_KERNEL); -	if (as == NULL) { -		printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", -		       sizeof(*as)); +	if (as == NULL)  		return -ENOMEM; -	} +  	as->magic = APM_BIOS_MAGIC;  	as->event_tail = as->event_head = 0;  	as->suspends_pending = as->standbys_pending = 0; @@ -2313,16 +2313,16 @@ static int __init apm_init(void)  	}  	if (apm_info.disabled) { -		printk(KERN_NOTICE "apm: disabled on user request.\n"); +		pr_notice("disabled on user request.\n");  		return -ENODEV;  	}  	if ((num_online_cpus() > 1) && !power_off && !smp) { -		printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n"); +		pr_notice("disabled - APM is not SMP safe.\n");  		apm_info.disabled = 1;  		return -ENODEV;  	}  	if (!acpi_disabled) { -		printk(KERN_NOTICE "apm: overridden by ACPI.\n"); +		pr_notice("overridden by ACPI.\n");  		apm_info.disabled = 1;  		return -ENODEV;  	} @@ -2356,8 +2356,7 @@ static int __init apm_init(void)  	kapmd_task = kthread_create(apm, NULL, "kapmd");  	if (IS_ERR(kapmd_task)) { -		printk(KERN_ERR "apm: disabled - Unable to start kernel " -				"thread.\n"); +		pr_err("disabled - Unable to start kernel thread\n");  		err = PTR_ERR(kapmd_task);  		kapmd_task = NULL;  		remove_proc_entry("apm", NULL); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 6ab6aa2fdfd..bac4c3804cc 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -32,7 +32,9 @@ obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o  ifdef CONFIG_PERF_EVENTS  obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_amd.o -obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_p4.o perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o +obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_p4.o +obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o +obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_uncore.o  endif  obj-$(CONFIG_X86_MCE)			+= mcheck/ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 146bb6218ee..9d92e19039f 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -19,6 +19,39 @@  #include "cpu.h" +static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) +{ +	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); +	u32 gprs[8] = { 0 }; +	int err; + +	WARN_ONCE((c->x86 != 0xf), "%s should only be used on K8!\n", __func__); + +	gprs[1] = msr; +	gprs[7] = 0x9c5a203a; + +	err = rdmsr_safe_regs(gprs); + +	*p = gprs[0] | ((u64)gprs[2] << 32); + +	return err; +} + +static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) +{ +	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); +	u32 gprs[8] = { 0 }; + +	WARN_ONCE((c->x86 != 0xf), "%s should only be used on K8!\n", __func__); + +	gprs[0] = (u32)val; +	gprs[1] = msr; +	gprs[2] = val >> 32; +	gprs[7] = 0x9c5a203a; + +	return wrmsr_safe_regs(gprs); +} +  #ifdef CONFIG_X86_32  /*   *	B step AMD K6 before B 9730xxxx have hardware bugs that can cause @@ -586,9 +619,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)  	    !cpu_has(c, X86_FEATURE_TOPOEXT)) {  		u64 val; -		if (!rdmsrl_amd_safe(0xc0011005, &val)) { +		if (!rdmsrl_safe(0xc0011005, &val)) {  			val |= 1ULL << 54; -			wrmsrl_amd_safe(0xc0011005, val); +			wrmsrl_safe(0xc0011005, val);  			rdmsrl(0xc0011005, val);  			if (val & (1ULL << 54)) {  				set_cpu_cap(c, X86_FEATURE_TOPOEXT); @@ -679,7 +712,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)  		err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask);  		if (err == 0) {  			mask |= (1 << 10); -			checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask); +			wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask);  		}  	} diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 46674fbb62b..c97bb7b5a9f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -55,8 +55,8 @@ static void __init check_fpu(void)  	if (!boot_cpu_data.hard_math) {  #ifndef CONFIG_MATH_EMULATION -		printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); -		printk(KERN_EMERG "Giving up.\n"); +		pr_emerg("No coprocessor found and no math emulation present\n"); +		pr_emerg("Giving up\n");  		for (;;) ;  #endif  		return; @@ -86,7 +86,7 @@ static void __init check_fpu(void)  	boot_cpu_data.fdiv_bug = fdiv_bug;  	if (boot_cpu_data.fdiv_bug) -		printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n"); +		pr_warn("Hmm, FPU with FDIV bug\n");  }  static void __init check_hlt(void) @@ -94,16 +94,16 @@ static void __init check_hlt(void)  	if (boot_cpu_data.x86 >= 5 || paravirt_enabled())  		return; -	printk(KERN_INFO "Checking 'hlt' instruction... "); +	pr_info("Checking 'hlt' instruction... ");  	if (!boot_cpu_data.hlt_works_ok) { -		printk("disabled\n"); +		pr_cont("disabled\n");  		return;  	}  	halt();  	halt();  	halt();  	halt(); -	printk(KERN_CONT "OK.\n"); +	pr_cont("OK\n");  }  /* @@ -116,7 +116,7 @@ static void __init check_popad(void)  #ifndef CONFIG_X86_POPAD_OK  	int res, inp = (int) &res; -	printk(KERN_INFO "Checking for popad bug... "); +	pr_info("Checking for popad bug... ");  	__asm__ __volatile__(  	  "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "  	  : "=&a" (res) @@ -127,9 +127,9 @@ static void __init check_popad(void)  	 * CPU hard. Too bad.  	 */  	if (res != 12345678) -		printk(KERN_CONT "Buggy.\n"); +		pr_cont("Buggy\n");  	else -		printk(KERN_CONT "OK.\n"); +		pr_cont("OK\n");  #endif  } @@ -161,7 +161,7 @@ void __init check_bugs(void)  {  	identify_boot_cpu();  #ifndef CONFIG_SMP -	printk(KERN_INFO "CPU: "); +	pr_info("CPU: ");  	print_cpu_info(&boot_cpu_data);  #endif  	check_config(); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6b9333b429b..5bbc082c47a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -947,7 +947,7 @@ static void __cpuinit __print_cpu_msr(void)  		index_max = msr_range_array[i].max;  		for (index = index_min; index < index_max; index++) { -			if (rdmsrl_amd_safe(index, &val)) +			if (rdmsrl_safe(index, &val))  				continue;  			printk(KERN_INFO " MSR%08x: %016llx\n", index, val);  		} diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index da27c5d2168..5a5a5dc1ff1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -7,6 +7,9 @@   * Copyright 2008 Intel Corporation   * Author: Andi Kleen   */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/thread_info.h>  #include <linux/capability.h>  #include <linux/miscdevice.h> @@ -210,7 +213,7 @@ static void drain_mcelog_buffer(void)  				cpu_relax();  				if (!m->finished && retries >= 4) { -					pr_err("MCE: skipping error being logged currently!\n"); +					pr_err("skipping error being logged currently!\n");  					break;  				}  			} @@ -1167,8 +1170,9 @@ int memory_failure(unsigned long pfn, int vector, int flags)  {  	/* mce_severity() should not hand us an ACTION_REQUIRED error */  	BUG_ON(flags & MF_ACTION_REQUIRED); -	printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n" -		"Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn); +	pr_err("Uncorrected memory error in page 0x%lx ignored\n" +	       "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", +	       pfn);  	return 0;  } @@ -1358,11 +1362,10 @@ static int __cpuinit __mcheck_cpu_cap_init(void)  	b = cap & MCG_BANKCNT_MASK;  	if (!banks) -		printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); +		pr_info("CPU supports %d MCE banks\n", b);  	if (b > MAX_NR_BANKS) { -		printk(KERN_WARNING -		       "MCE: Using only %u machine check banks out of %u\n", +		pr_warn("Using only %u machine check banks out of %u\n",  			MAX_NR_BANKS, b);  		b = MAX_NR_BANKS;  	} @@ -1419,7 +1422,7 @@ static void __mcheck_cpu_init_generic(void)  static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)  {  	if (c->x86_vendor == X86_VENDOR_UNKNOWN) { -		pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); +		pr_info("unknown CPU type - not enabling MCE support\n");  		return -EOPNOTSUPP;  	} @@ -1574,7 +1577,7 @@ static void __mcheck_cpu_init_timer(void)  /* Handle unconfigured int18 (should never happen) */  static void unexpected_machine_check(struct pt_regs *regs, long error_code)  { -	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", +	pr_err("CPU#%d: Unexpected int18 (Machine Check)\n",  	       smp_processor_id());  } @@ -1893,8 +1896,7 @@ static int __init mcheck_enable(char *str)  			get_option(&str, &monarch_timeout);  		}  	} else { -		printk(KERN_INFO "mce argument %s ignored. Please use /sys\n", -		       str); +		pr_info("mce argument %s ignored. Please use /sys\n", str);  		return 0;  	}  	return 1; diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index bdda2e6c673..35ffda5d072 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -258,11 +258,11 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,  		/* Compute the maximum size with which we can make a range: */  		if (range_startk) -			max_align = ffs(range_startk) - 1; +			max_align = __ffs(range_startk);  		else -			max_align = 32; +			max_align = BITS_PER_LONG - 1; -		align = fls(range_sizek) - 1; +		align = __fls(range_sizek);  		if (align > max_align)  			align = max_align; diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 75772ae6c65..e9fe907cd24 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -361,11 +361,7 @@ static void __init print_mtrr_state(void)  	}  	pr_debug("MTRR variable ranges %sabled:\n",  		 mtrr_state.enabled & 2 ? "en" : "dis"); -	if (size_or_mask & 0xffffffffUL) -		high_width = ffs(size_or_mask & 0xffffffffUL) - 1; -	else -		high_width = ffs(size_or_mask>>32) + 32 - 1; -	high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4; +	high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4;  	for (i = 0; i < num_var_ranges; ++i) {  		if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c4706cf9c01..29557aa06dd 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -35,17 +35,6 @@  #include "perf_event.h" -#if 0 -#undef wrmsrl -#define wrmsrl(msr, val) 					\ -do {								\ -	trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\ -			(unsigned long)(val));			\ -	native_write_msr((msr), (u32)((u64)(val)), 		\ -			(u32)((u64)(val) >> 32));		\ -} while (0) -#endif -  struct x86_pmu x86_pmu __read_mostly;  DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { @@ -74,7 +63,7 @@ u64 x86_perf_event_update(struct perf_event *event)  	int idx = hwc->idx;  	s64 delta; -	if (idx == X86_PMC_IDX_FIXED_BTS) +	if (idx == INTEL_PMC_IDX_FIXED_BTS)  		return 0;  	/* @@ -86,7 +75,7 @@ u64 x86_perf_event_update(struct perf_event *event)  	 */  again:  	prev_raw_count = local64_read(&hwc->prev_count); -	rdmsrl(hwc->event_base, new_raw_count); +	rdpmcl(hwc->event_base_rdpmc, new_raw_count);  	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,  					new_raw_count) != prev_raw_count) @@ -189,7 +178,7 @@ static void release_pmc_hardware(void) {}  static bool check_hw_exists(void)  { -	u64 val, val_new = 0; +	u64 val, val_new = ~0;  	int i, reg, ret = 0;  	/* @@ -222,8 +211,9 @@ static bool check_hw_exists(void)  	 * that don't trap on the MSR access and always return 0s.  	 */  	val = 0xabcdUL; -	ret = checking_wrmsrl(x86_pmu_event_addr(0), val); -	ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new); +	reg = x86_pmu_event_addr(0); +	ret = wrmsrl_safe(reg, val); +	ret |= rdmsrl_safe(reg, &val_new);  	if (ret || val != val_new)  		goto msr_fail; @@ -240,6 +230,7 @@ bios_fail:  msr_fail:  	printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); +	printk(KERN_ERR "Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new);  	return false;  } @@ -388,7 +379,7 @@ int x86_pmu_hw_config(struct perf_event *event)  		int precise = 0;  		/* Support for constant skid */ -		if (x86_pmu.pebs_active) { +		if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {  			precise++;  			/* Support for IP fixup */ @@ -637,8 +628,8 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)  	c = sched->constraints[sched->state.event];  	/* Prefer fixed purpose counters */ -	if (x86_pmu.num_counters_fixed) { -		idx = X86_PMC_IDX_FIXED; +	if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { +		idx = INTEL_PMC_IDX_FIXED;  		for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {  			if (!__test_and_set_bit(idx, sched->state.used))  				goto done; @@ -646,7 +637,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)  	}  	/* Grab the first unused counter starting with idx */  	idx = sched->state.counter; -	for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) { +	for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {  		if (!__test_and_set_bit(idx, sched->state.used))  			goto done;  	} @@ -704,8 +695,8 @@ static bool perf_sched_next_event(struct perf_sched *sched)  /*   * Assign a counter for each event.   */ -static int perf_assign_events(struct event_constraint **constraints, int n, -			      int wmin, int wmax, int *assign) +int perf_assign_events(struct event_constraint **constraints, int n, +			int wmin, int wmax, int *assign)  {  	struct perf_sched sched; @@ -824,15 +815,17 @@ static inline void x86_assign_hw_event(struct perf_event *event,  	hwc->last_cpu = smp_processor_id();  	hwc->last_tag = ++cpuc->tags[i]; -	if (hwc->idx == X86_PMC_IDX_FIXED_BTS) { +	if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {  		hwc->config_base = 0;  		hwc->event_base	= 0; -	} else if (hwc->idx >= X86_PMC_IDX_FIXED) { +	} else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {  		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; -		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED); +		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED); +		hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;  	} else {  		hwc->config_base = x86_pmu_config_addr(hwc->idx);  		hwc->event_base  = x86_pmu_event_addr(hwc->idx); +		hwc->event_base_rdpmc = hwc->idx;  	}  } @@ -930,7 +923,7 @@ int x86_perf_event_set_period(struct perf_event *event)  	s64 period = hwc->sample_period;  	int ret = 0, idx = hwc->idx; -	if (idx == X86_PMC_IDX_FIXED_BTS) +	if (idx == INTEL_PMC_IDX_FIXED_BTS)  		return 0;  	/* @@ -1316,7 +1309,6 @@ static struct attribute_group x86_pmu_format_group = {  static int __init init_hw_perf_events(void)  {  	struct x86_pmu_quirk *quirk; -	struct event_constraint *c;  	int err;  	pr_info("Performance Events: "); @@ -1347,21 +1339,8 @@ static int __init init_hw_perf_events(void)  	for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)  		quirk->func(); -	if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { -		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", -		     x86_pmu.num_counters, X86_PMC_MAX_GENERIC); -		x86_pmu.num_counters = X86_PMC_MAX_GENERIC; -	} -	x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; - -	if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { -		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", -		     x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); -		x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; -	} - -	x86_pmu.intel_ctrl |= -		((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; +	if (!x86_pmu.intel_ctrl) +		x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;  	perf_events_lapic_init();  	register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); @@ -1370,22 +1349,6 @@ static int __init init_hw_perf_events(void)  		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,  				   0, x86_pmu.num_counters, 0); -	if (x86_pmu.event_constraints) { -		/* -		 * event on fixed counter2 (REF_CYCLES) only works on this -		 * counter, so do not extend mask to generic counters -		 */ -		for_each_event_constraint(c, x86_pmu.event_constraints) { -			if (c->cmask != X86_RAW_EVENT_MASK -			    || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) { -				continue; -			} - -			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; -			c->weight += x86_pmu.num_counters; -		} -	} -  	x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */  	x86_pmu_format_group.attrs = x86_pmu.format_attrs; @@ -1620,8 +1583,8 @@ static int x86_pmu_event_idx(struct perf_event *event)  	if (!x86_pmu.attr_rdpmc)  		return 0; -	if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { -		idx -= X86_PMC_IDX_FIXED; +	if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) { +		idx -= INTEL_PMC_IDX_FIXED;  		idx |= 1 << 30;  	} @@ -1649,7 +1612,12 @@ static ssize_t set_attr_rdpmc(struct device *cdev,  			      struct device_attribute *attr,  			      const char *buf, size_t count)  { -	unsigned long val = simple_strtoul(buf, NULL, 0); +	unsigned long val; +	ssize_t ret; + +	ret = kstrtoul(buf, 0, &val); +	if (ret) +		return ret;  	if (!!val != !!x86_pmu.attr_rdpmc) {  		x86_pmu.attr_rdpmc = !!val; @@ -1682,13 +1650,20 @@ static void x86_pmu_flush_branch_stack(void)  		x86_pmu.flush_branch_stack();  } +void perf_check_microcode(void) +{ +	if (x86_pmu.check_microcode) +		x86_pmu.check_microcode(); +} +EXPORT_SYMBOL_GPL(perf_check_microcode); +  static struct pmu pmu = {  	.pmu_enable		= x86_pmu_enable,  	.pmu_disable		= x86_pmu_disable, -	.attr_groups	= x86_pmu_attr_groups, +	.attr_groups		= x86_pmu_attr_groups, -	.event_init	= x86_pmu_event_init, +	.event_init		= x86_pmu_event_init,  	.add			= x86_pmu_add,  	.del			= x86_pmu_del, @@ -1696,11 +1671,11 @@ static struct pmu pmu = {  	.stop			= x86_pmu_stop,  	.read			= x86_pmu_read, -	.start_txn	= x86_pmu_start_txn, -	.cancel_txn	= x86_pmu_cancel_txn, -	.commit_txn	= x86_pmu_commit_txn, +	.start_txn		= x86_pmu_start_txn, +	.cancel_txn		= x86_pmu_cancel_txn, +	.commit_txn		= x86_pmu_commit_txn, -	.event_idx	= x86_pmu_event_idx, +	.event_idx		= x86_pmu_event_idx,  	.flush_branch_stack	= x86_pmu_flush_branch_stack,  }; @@ -1863,7 +1838,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs)  		else  			misc |= PERF_RECORD_MISC_GUEST_KERNEL;  	} else { -		if (user_mode(regs)) +		if (!kernel_ip(regs->ip))  			misc |= PERF_RECORD_MISC_USER;  		else  			misc |= PERF_RECORD_MISC_KERNEL; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 7241e2fc3c1..a15df4be151 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -14,6 +14,18 @@  #include <linux/perf_event.h> +#if 0 +#undef wrmsrl +#define wrmsrl(msr, val) 						\ +do {									\ +	unsigned int _msr = (msr);					\ +	u64 _val = (val);						\ +	trace_printk("wrmsrl(%x, %Lx)\n", (unsigned int)(_msr),		\ +			(unsigned long long)(_val));			\ +	native_write_msr((_msr), (u32)(_val), (u32)(_val >> 32));	\ +} while (0) +#endif +  /*   *          |   NHM/WSM    |      SNB     |   * register ------------------------------- @@ -57,7 +69,7 @@ struct amd_nb {  };  /* The maximal number of PEBS events: */ -#define MAX_PEBS_EVENTS		4 +#define MAX_PEBS_EVENTS		8  /*   * A debug store configuration. @@ -349,6 +361,8 @@ struct x86_pmu {  	void		(*cpu_starting)(int cpu);  	void		(*cpu_dying)(int cpu);  	void		(*cpu_dead)(int cpu); + +	void		(*check_microcode)(void);  	void		(*flush_branch_stack)(void);  	/* @@ -360,12 +374,16 @@ struct x86_pmu {  	/*  	 * Intel DebugStore bits  	 */ -	int		bts, pebs; -	int		bts_active, pebs_active; +	int		bts		:1, +			bts_active	:1, +			pebs		:1, +			pebs_active	:1, +			pebs_broken	:1;  	int		pebs_record_size;  	void		(*drain_pebs)(struct pt_regs *regs);  	struct event_constraint *pebs_constraints;  	void		(*pebs_aliases)(struct perf_event *event); +	int 		max_pebs_events;  	/*  	 * Intel LBR @@ -468,6 +486,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,  void x86_pmu_enable_all(int added); +int perf_assign_events(struct event_constraint **constraints, int n, +			int wmin, int wmax, int *assign);  int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);  void x86_pmu_stop(struct perf_event *event, int flags); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 11a4eb9131d..4528ae7b6ec 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -366,7 +366,7 @@ static void amd_pmu_cpu_starting(int cpu)  	cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; -	if (boot_cpu_data.x86_max_cores < 2 || boot_cpu_data.x86 == 0x15) +	if (boot_cpu_data.x86_max_cores < 2)  		return;  	nb_id = amd_get_nb_id(cpu); @@ -422,35 +422,6 @@ static struct attribute *amd_format_attr[] = {  	NULL,  }; -static __initconst const struct x86_pmu amd_pmu = { -	.name			= "AMD", -	.handle_irq		= x86_pmu_handle_irq, -	.disable_all		= x86_pmu_disable_all, -	.enable_all		= x86_pmu_enable_all, -	.enable			= x86_pmu_enable_event, -	.disable		= x86_pmu_disable_event, -	.hw_config		= amd_pmu_hw_config, -	.schedule_events	= x86_schedule_events, -	.eventsel		= MSR_K7_EVNTSEL0, -	.perfctr		= MSR_K7_PERFCTR0, -	.event_map		= amd_pmu_event_map, -	.max_events		= ARRAY_SIZE(amd_perfmon_event_map), -	.num_counters		= AMD64_NUM_COUNTERS, -	.cntval_bits		= 48, -	.cntval_mask		= (1ULL << 48) - 1, -	.apic			= 1, -	/* use highest bit to detect overflow */ -	.max_period		= (1ULL << 47) - 1, -	.get_event_constraints	= amd_get_event_constraints, -	.put_event_constraints	= amd_put_event_constraints, - -	.format_attrs		= amd_format_attr, - -	.cpu_prepare		= amd_pmu_cpu_prepare, -	.cpu_starting		= amd_pmu_cpu_starting, -	.cpu_dead		= amd_pmu_cpu_dead, -}; -  /* AMD Family 15h */  #define AMD_EVENT_TYPE_MASK	0x000000F0ULL @@ -597,8 +568,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev  	}  } -static __initconst const struct x86_pmu amd_pmu_f15h = { -	.name			= "AMD Family 15h", +static __initconst const struct x86_pmu amd_pmu = { +	.name			= "AMD",  	.handle_irq		= x86_pmu_handle_irq,  	.disable_all		= x86_pmu_disable_all,  	.enable_all		= x86_pmu_enable_all, @@ -606,50 +577,68 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {  	.disable		= x86_pmu_disable_event,  	.hw_config		= amd_pmu_hw_config,  	.schedule_events	= x86_schedule_events, -	.eventsel		= MSR_F15H_PERF_CTL, -	.perfctr		= MSR_F15H_PERF_CTR, +	.eventsel		= MSR_K7_EVNTSEL0, +	.perfctr		= MSR_K7_PERFCTR0,  	.event_map		= amd_pmu_event_map,  	.max_events		= ARRAY_SIZE(amd_perfmon_event_map), -	.num_counters		= AMD64_NUM_COUNTERS_F15H, +	.num_counters		= AMD64_NUM_COUNTERS,  	.cntval_bits		= 48,  	.cntval_mask		= (1ULL << 48) - 1,  	.apic			= 1,  	/* use highest bit to detect overflow */  	.max_period		= (1ULL << 47) - 1, -	.get_event_constraints	= amd_get_event_constraints_f15h, -	/* nortbridge counters not yet implemented: */ -#if 0 +	.get_event_constraints	= amd_get_event_constraints,  	.put_event_constraints	= amd_put_event_constraints, +	.format_attrs		= amd_format_attr, +  	.cpu_prepare		= amd_pmu_cpu_prepare, -	.cpu_dead		= amd_pmu_cpu_dead, -#endif  	.cpu_starting		= amd_pmu_cpu_starting, -	.format_attrs		= amd_format_attr, +	.cpu_dead		= amd_pmu_cpu_dead,  }; +static int setup_event_constraints(void) +{ +	if (boot_cpu_data.x86 >= 0x15) +		x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; +	return 0; +} + +static int setup_perfctr_core(void) +{ +	if (!cpu_has_perfctr_core) { +		WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h, +		     KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!"); +		return -ENODEV; +	} + +	WARN(x86_pmu.get_event_constraints == amd_get_event_constraints, +	     KERN_ERR "hw perf events core counters need constraints handler!"); + +	/* +	 * If core performance counter extensions exists, we must use +	 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also +	 * x86_pmu_addr_offset(). +	 */ +	x86_pmu.eventsel	= MSR_F15H_PERF_CTL; +	x86_pmu.perfctr		= MSR_F15H_PERF_CTR; +	x86_pmu.num_counters	= AMD64_NUM_COUNTERS_CORE; + +	printk(KERN_INFO "perf: AMD core performance counters detected\n"); + +	return 0; +} +  __init int amd_pmu_init(void)  {  	/* Performance-monitoring supported from K7 and later: */  	if (boot_cpu_data.x86 < 6)  		return -ENODEV; -	/* -	 * If core performance counter extensions exists, it must be -	 * family 15h, otherwise fail. See x86_pmu_addr_offset(). -	 */ -	switch (boot_cpu_data.x86) { -	case 0x15: -		if (!cpu_has_perfctr_core) -			return -ENODEV; -		x86_pmu = amd_pmu_f15h; -		break; -	default: -		if (cpu_has_perfctr_core) -			return -ENODEV; -		x86_pmu = amd_pmu; -		break; -	} +	x86_pmu = amd_pmu; + +	setup_event_constraints(); +	setup_perfctr_core();  	/* Events are common for all AMDs */  	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 187c294bc65..7a8b9d0abca 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -5,6 +5,8 @@   * among events on a single PMU.   */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/stddef.h>  #include <linux/types.h>  #include <linux/init.h> @@ -21,14 +23,14 @@   */  static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =  { -  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c, -  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0, -  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e, -  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e, -  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4, -  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5, -  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c, -  [PERF_COUNT_HW_REF_CPU_CYCLES]	= 0x0300, /* pseudo-encoding */ +	[PERF_COUNT_HW_CPU_CYCLES]		= 0x003c, +	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0, +	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e, +	[PERF_COUNT_HW_CACHE_MISSES]		= 0x412e, +	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4, +	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5, +	[PERF_COUNT_HW_BUS_CYCLES]		= 0x013c, +	[PERF_COUNT_HW_REF_CPU_CYCLES]		= 0x0300, /* pseudo-encoding */  };  static struct event_constraint intel_core_event_constraints[] __read_mostly = @@ -747,7 +749,7 @@ static void intel_pmu_disable_all(void)  	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); -	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) +	if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))  		intel_pmu_disable_bts();  	intel_pmu_pebs_disable_all(); @@ -763,9 +765,9 @@ static void intel_pmu_enable_all(int added)  	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,  			x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); -	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { +	if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {  		struct perf_event *event = -			cpuc->events[X86_PMC_IDX_FIXED_BTS]; +			cpuc->events[INTEL_PMC_IDX_FIXED_BTS];  		if (WARN_ON_ONCE(!event))  			return; @@ -871,7 +873,7 @@ static inline void intel_pmu_ack_status(u64 ack)  static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)  { -	int idx = hwc->idx - X86_PMC_IDX_FIXED; +	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;  	u64 ctrl_val, mask;  	mask = 0xfULL << (idx * 4); @@ -886,7 +888,7 @@ static void intel_pmu_disable_event(struct perf_event *event)  	struct hw_perf_event *hwc = &event->hw;  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); -	if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { +	if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {  		intel_pmu_disable_bts();  		intel_pmu_drain_bts_buffer();  		return; @@ -915,7 +917,7 @@ static void intel_pmu_disable_event(struct perf_event *event)  static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)  { -	int idx = hwc->idx - X86_PMC_IDX_FIXED; +	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;  	u64 ctrl_val, bits, mask;  	/* @@ -949,7 +951,7 @@ static void intel_pmu_enable_event(struct perf_event *event)  	struct hw_perf_event *hwc = &event->hw;  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); -	if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { +	if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {  		if (!__this_cpu_read(cpu_hw_events.enabled))  			return; @@ -1000,14 +1002,14 @@ static void intel_pmu_reset(void)  	local_irq_save(flags); -	printk("clearing PMU state on CPU#%d\n", smp_processor_id()); +	pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());  	for (idx = 0; idx < x86_pmu.num_counters; idx++) { -		checking_wrmsrl(x86_pmu_config_addr(idx), 0ull); -		checking_wrmsrl(x86_pmu_event_addr(idx),  0ull); +		wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); +		wrmsrl_safe(x86_pmu_event_addr(idx),  0ull);  	}  	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) -		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); +		wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);  	if (ds)  		ds->bts_index = ds->bts_buffer_base; @@ -1707,16 +1709,61 @@ static __init void intel_clovertown_quirk(void)  	 * But taken together it might just make sense to not enable PEBS on  	 * these chips.  	 */ -	printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); +	pr_warn("PEBS disabled due to CPU errata\n");  	x86_pmu.pebs = 0;  	x86_pmu.pebs_constraints = NULL;  } +static int intel_snb_pebs_broken(int cpu) +{ +	u32 rev = UINT_MAX; /* default to broken for unknown models */ + +	switch (cpu_data(cpu).x86_model) { +	case 42: /* SNB */ +		rev = 0x28; +		break; + +	case 45: /* SNB-EP */ +		switch (cpu_data(cpu).x86_mask) { +		case 6: rev = 0x618; break; +		case 7: rev = 0x70c; break; +		} +	} + +	return (cpu_data(cpu).microcode < rev); +} + +static void intel_snb_check_microcode(void) +{ +	int pebs_broken = 0; +	int cpu; + +	get_online_cpus(); +	for_each_online_cpu(cpu) { +		if ((pebs_broken = intel_snb_pebs_broken(cpu))) +			break; +	} +	put_online_cpus(); + +	if (pebs_broken == x86_pmu.pebs_broken) +		return; + +	/* +	 * Serialized by the microcode lock.. +	 */ +	if (x86_pmu.pebs_broken) { +		pr_info("PEBS enabled due to microcode update\n"); +		x86_pmu.pebs_broken = 0; +	} else { +		pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n"); +		x86_pmu.pebs_broken = 1; +	} +} +  static __init void intel_sandybridge_quirk(void)  { -	printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); -	x86_pmu.pebs = 0; -	x86_pmu.pebs_constraints = NULL; +	x86_pmu.check_microcode = intel_snb_check_microcode; +	intel_snb_check_microcode();  }  static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { @@ -1736,8 +1783,8 @@ static __init void intel_arch_events_quirk(void)  	/* disable event that reported as not presend by cpuid */  	for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {  		intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; -		printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n", -				intel_arch_events_map[bit].name); +		pr_warn("CPUID marked event: \'%s\' unavailable\n", +			intel_arch_events_map[bit].name);  	}  } @@ -1756,7 +1803,7 @@ static __init void intel_nehalem_quirk(void)  		intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;  		ebx.split.no_branch_misses_retired = 0;  		x86_pmu.events_maskl = ebx.full; -		printk(KERN_INFO "CPU erratum AAJ80 worked around\n"); +		pr_info("CPU erratum AAJ80 worked around\n");  	}  } @@ -1765,6 +1812,7 @@ __init int intel_pmu_init(void)  	union cpuid10_edx edx;  	union cpuid10_eax eax;  	union cpuid10_ebx ebx; +	struct event_constraint *c;  	unsigned int unused;  	int version; @@ -1800,6 +1848,8 @@ __init int intel_pmu_init(void)  	x86_pmu.events_maskl		= ebx.full;  	x86_pmu.events_mask_len		= eax.split.mask_length; +	x86_pmu.max_pebs_events		= min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); +  	/*  	 * Quirk: v2 perfmon does not report fixed-purpose events, so  	 * assume at least 3 events: @@ -1951,5 +2001,37 @@ __init int intel_pmu_init(void)  		}  	} +	if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { +		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", +		     x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); +		x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; +	} +	x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; + +	if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { +		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", +		     x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED); +		x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; +	} + +	x86_pmu.intel_ctrl |= +		((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; + +	if (x86_pmu.event_constraints) { +		/* +		 * event on fixed counter2 (REF_CYCLES) only works on this +		 * counter, so do not extend mask to generic counters +		 */ +		for_each_event_constraint(c, x86_pmu.event_constraints) { +			if (c->cmask != X86_RAW_EVENT_MASK +			    || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { +				continue; +			} + +			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; +			c->weight += x86_pmu.num_counters; +		} +	} +  	return 0;  } diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 35e2192df9f..629ae0b7ad9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -248,7 +248,7 @@ void reserve_ds_buffers(void)   */  struct event_constraint bts_constraint = -	EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); +	EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);  void intel_pmu_enable_bts(u64 config)  { @@ -295,7 +295,7 @@ int intel_pmu_drain_bts_buffer(void)  		u64	to;  		u64	flags;  	}; -	struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; +	struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];  	struct bts_record *at, *top;  	struct perf_output_handle handle;  	struct perf_event_header header; @@ -620,7 +620,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)  	 * Should not happen, we program the threshold at 1 and do not  	 * set a reset value.  	 */ -	WARN_ON_ONCE(n > 1); +	WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);  	at += n - 1;  	__intel_pmu_pebs_event(event, iregs, at); @@ -651,10 +651,10 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)  	 * Should not happen, we program the threshold at 1 and do not  	 * set a reset value.  	 */ -	WARN_ON_ONCE(n > MAX_PEBS_EVENTS); +	WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);  	for ( ; at < top; at++) { -		for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { +		for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) {  			event = cpuc->events[bit];  			if (!test_bit(bit, cpuc->active_mask))  				continue; @@ -670,7 +670,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)  			break;  		} -		if (!event || bit >= MAX_PEBS_EVENTS) +		if (!event || bit >= x86_pmu.max_pebs_events)  			continue;  		__intel_pmu_pebs_event(event, iregs, at); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c new file mode 100644 index 00000000000..19faffc6088 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -0,0 +1,1850 @@ +#include "perf_event_intel_uncore.h" + +static struct intel_uncore_type *empty_uncore[] = { NULL, }; +static struct intel_uncore_type **msr_uncores = empty_uncore; +static struct intel_uncore_type **pci_uncores = empty_uncore; +/* pci bus to socket mapping */ +static int pcibus_to_physid[256] = { [0 ... 255] = -1, }; + +static DEFINE_RAW_SPINLOCK(uncore_box_lock); + +/* mask of cpus that collect uncore events */ +static cpumask_t uncore_cpu_mask; + +/* constraint for the fixed counter */ +static struct event_constraint constraint_fixed = +	EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); +static struct event_constraint constraint_empty = +	EVENT_CONSTRAINT(0, 0, 0); + +DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); +DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); +DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); +DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); +DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28"); +DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15"); +DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30"); +DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); +DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17"); +DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22"); +DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31"); +DEFINE_UNCORE_FORMAT_ATTR(filter_brand0, filter_brand0, "config1:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(filter_brand1, filter_brand1, "config1:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(filter_brand2, filter_brand2, "config1:16-23"); +DEFINE_UNCORE_FORMAT_ATTR(filter_brand3, filter_brand3, "config1:24-31"); + +/* Sandy Bridge-EP uncore support */ +static struct intel_uncore_type snbep_uncore_cbox; +static struct intel_uncore_type snbep_uncore_pcu; + +static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box) +{ +	struct pci_dev *pdev = box->pci_dev; +	int box_ctl = uncore_pci_box_ctl(box); +	u32 config; + +	pci_read_config_dword(pdev, box_ctl, &config); +	config |= SNBEP_PMON_BOX_CTL_FRZ; +	pci_write_config_dword(pdev, box_ctl, config); +} + +static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box) +{ +	struct pci_dev *pdev = box->pci_dev; +	int box_ctl = uncore_pci_box_ctl(box); +	u32 config; + +	pci_read_config_dword(pdev, box_ctl, &config); +	config &= ~SNBEP_PMON_BOX_CTL_FRZ; +	pci_write_config_dword(pdev, box_ctl, config); +} + +static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct pci_dev *pdev = box->pci_dev; +	struct hw_perf_event *hwc = &event->hw; + +	pci_write_config_dword(pdev, hwc->config_base, hwc->config | +				SNBEP_PMON_CTL_EN); +} + +static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct pci_dev *pdev = box->pci_dev; +	struct hw_perf_event *hwc = &event->hw; + +	pci_write_config_dword(pdev, hwc->config_base, hwc->config); +} + +static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct pci_dev *pdev = box->pci_dev; +	struct hw_perf_event *hwc = &event->hw; +	u64 count; + +	pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); +	pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); +	return count; +} + +static void snbep_uncore_pci_init_box(struct intel_uncore_box *box) +{ +	struct pci_dev *pdev = box->pci_dev; +	pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, +				SNBEP_PMON_BOX_CTL_INT); +} + +static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box) +{ +	u64 config; +	unsigned msr; + +	msr = uncore_msr_box_ctl(box); +	if (msr) { +		rdmsrl(msr, config); +		config |= SNBEP_PMON_BOX_CTL_FRZ; +		wrmsrl(msr, config); +		return; +	} +} + +static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box) +{ +	u64 config; +	unsigned msr; + +	msr = uncore_msr_box_ctl(box); +	if (msr) { +		rdmsrl(msr, config); +		config &= ~SNBEP_PMON_BOX_CTL_FRZ; +		wrmsrl(msr, config); +		return; +	} +} + +static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct hw_perf_event *hwc = &event->hw; +	struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + +	if (reg1->idx != EXTRA_REG_NONE) +		wrmsrl(reg1->reg, reg1->config); + +	wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct hw_perf_event *hwc = &event->hw; + +	wrmsrl(hwc->config_base, hwc->config); +} + +static u64 snbep_uncore_msr_read_counter(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct hw_perf_event *hwc = &event->hw; +	u64 count; + +	rdmsrl(hwc->event_base, count); +	return count; +} + +static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) +{ +	unsigned msr = uncore_msr_box_ctl(box); +	if (msr) +		wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT); +} + +static struct event_constraint * +snbep_uncore_get_constraint(struct intel_uncore_box *box, +			    struct perf_event *event) +{ +	struct intel_uncore_extra_reg *er; +	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; +	unsigned long flags; +	bool ok = false; + +	if (reg1->idx == EXTRA_REG_NONE || (box->phys_id >= 0 && reg1->alloc)) +		return NULL; + +	er = &box->shared_regs[reg1->idx]; +	raw_spin_lock_irqsave(&er->lock, flags); +	if (!atomic_read(&er->ref) || er->config1 == reg1->config) { +		atomic_inc(&er->ref); +		er->config1 = reg1->config; +		ok = true; +	} +	raw_spin_unlock_irqrestore(&er->lock, flags); + +	if (ok) { +		if (box->phys_id >= 0) +			reg1->alloc = 1; +		return NULL; +	} +	return &constraint_empty; +} + +static void snbep_uncore_put_constraint(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct intel_uncore_extra_reg *er; +	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + +	if (box->phys_id < 0 || !reg1->alloc) +		return; + +	er = &box->shared_regs[reg1->idx]; +	atomic_dec(&er->ref); +	reg1->alloc = 0; +} + +static int snbep_uncore_hw_config(struct intel_uncore_box *box, +				  struct perf_event *event) +{ +	struct hw_perf_event *hwc = &event->hw; +	struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + +	if (box->pmu->type == &snbep_uncore_cbox) { +		reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + +			SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; +		reg1->config = event->attr.config1 & +			SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK; +	} else if (box->pmu->type == &snbep_uncore_pcu) { +		reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER; +		reg1->config = event->attr.config1 & +			SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK; +	} else { +		return 0; +	} +	reg1->idx = 0; +	return 0; +} + +static struct attribute *snbep_uncore_formats_attr[] = { +	&format_attr_event.attr, +	&format_attr_umask.attr, +	&format_attr_edge.attr, +	&format_attr_inv.attr, +	&format_attr_thresh8.attr, +	NULL, +}; + +static struct attribute *snbep_uncore_ubox_formats_attr[] = { +	&format_attr_event.attr, +	&format_attr_umask.attr, +	&format_attr_edge.attr, +	&format_attr_inv.attr, +	&format_attr_thresh5.attr, +	NULL, +}; + +static struct attribute *snbep_uncore_cbox_formats_attr[] = { +	&format_attr_event.attr, +	&format_attr_umask.attr, +	&format_attr_edge.attr, +	&format_attr_tid_en.attr, +	&format_attr_inv.attr, +	&format_attr_thresh8.attr, +	&format_attr_filter_tid.attr, +	&format_attr_filter_nid.attr, +	&format_attr_filter_state.attr, +	&format_attr_filter_opc.attr, +	NULL, +}; + +static struct attribute *snbep_uncore_pcu_formats_attr[] = { +	&format_attr_event.attr, +	&format_attr_occ_sel.attr, +	&format_attr_edge.attr, +	&format_attr_inv.attr, +	&format_attr_thresh5.attr, +	&format_attr_occ_invert.attr, +	&format_attr_occ_edge.attr, +	&format_attr_filter_brand0.attr, +	&format_attr_filter_brand1.attr, +	&format_attr_filter_brand2.attr, +	&format_attr_filter_brand3.attr, +	NULL, +}; + +static struct uncore_event_desc snbep_uncore_imc_events[] = { +	INTEL_UNCORE_EVENT_DESC(clockticks,      "event=0xff,umask=0x00"), +	INTEL_UNCORE_EVENT_DESC(cas_count_read,  "event=0x04,umask=0x03"), +	INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"), +	{ /* end: all zeroes */ }, +}; + +static struct uncore_event_desc snbep_uncore_qpi_events[] = { +	INTEL_UNCORE_EVENT_DESC(clockticks,       "event=0x14"), +	INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"), +	INTEL_UNCORE_EVENT_DESC(drs_data,         "event=0x02,umask=0x08"), +	INTEL_UNCORE_EVENT_DESC(ncb_data,         "event=0x03,umask=0x04"), +	{ /* end: all zeroes */ }, +}; + +static struct attribute_group snbep_uncore_format_group = { +	.name = "format", +	.attrs = snbep_uncore_formats_attr, +}; + +static struct attribute_group snbep_uncore_ubox_format_group = { +	.name = "format", +	.attrs = snbep_uncore_ubox_formats_attr, +}; + +static struct attribute_group snbep_uncore_cbox_format_group = { +	.name = "format", +	.attrs = snbep_uncore_cbox_formats_attr, +}; + +static struct attribute_group snbep_uncore_pcu_format_group = { +	.name = "format", +	.attrs = snbep_uncore_pcu_formats_attr, +}; + +static struct intel_uncore_ops snbep_uncore_msr_ops = { +	.init_box	= snbep_uncore_msr_init_box, +	.disable_box	= snbep_uncore_msr_disable_box, +	.enable_box	= snbep_uncore_msr_enable_box, +	.disable_event	= snbep_uncore_msr_disable_event, +	.enable_event	= snbep_uncore_msr_enable_event, +	.read_counter	= snbep_uncore_msr_read_counter, +	.get_constraint = snbep_uncore_get_constraint, +	.put_constraint = snbep_uncore_put_constraint, +	.hw_config	= snbep_uncore_hw_config, +}; + +static struct intel_uncore_ops snbep_uncore_pci_ops = { +	.init_box	= snbep_uncore_pci_init_box, +	.disable_box	= snbep_uncore_pci_disable_box, +	.enable_box	= snbep_uncore_pci_enable_box, +	.disable_event	= snbep_uncore_pci_disable_event, +	.enable_event	= snbep_uncore_pci_enable_event, +	.read_counter	= snbep_uncore_pci_read_counter, +}; + +static struct event_constraint snbep_uncore_cbox_constraints[] = { +	UNCORE_EVENT_CONSTRAINT(0x01, 0x1), +	UNCORE_EVENT_CONSTRAINT(0x02, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x04, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x05, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x07, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x11, 0x1), +	UNCORE_EVENT_CONSTRAINT(0x12, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x13, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x1b, 0xc), +	UNCORE_EVENT_CONSTRAINT(0x1c, 0xc), +	UNCORE_EVENT_CONSTRAINT(0x1d, 0xc), +	UNCORE_EVENT_CONSTRAINT(0x1e, 0xc), +	EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff), +	UNCORE_EVENT_CONSTRAINT(0x21, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x23, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x31, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x32, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x33, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x34, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x35, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x36, 0x1), +	UNCORE_EVENT_CONSTRAINT(0x37, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x38, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x39, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x3b, 0x1), +	EVENT_CONSTRAINT_END +}; + +static struct event_constraint snbep_uncore_r2pcie_constraints[] = { +	UNCORE_EVENT_CONSTRAINT(0x10, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x11, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x12, 0x1), +	UNCORE_EVENT_CONSTRAINT(0x23, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x24, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x25, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x26, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x32, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x33, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x34, 0x3), +	EVENT_CONSTRAINT_END +}; + +static struct event_constraint snbep_uncore_r3qpi_constraints[] = { +	UNCORE_EVENT_CONSTRAINT(0x10, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x11, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x12, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x13, 0x1), +	UNCORE_EVENT_CONSTRAINT(0x20, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x21, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x22, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x23, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x24, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x25, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x26, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x30, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x31, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x32, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x33, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x34, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x36, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x37, 0x3), +	EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type snbep_uncore_ubox = { +	.name		= "ubox", +	.num_counters   = 2, +	.num_boxes	= 1, +	.perf_ctr_bits	= 44, +	.fixed_ctr_bits	= 48, +	.perf_ctr	= SNBEP_U_MSR_PMON_CTR0, +	.event_ctl	= SNBEP_U_MSR_PMON_CTL0, +	.event_mask	= SNBEP_U_MSR_PMON_RAW_EVENT_MASK, +	.fixed_ctr	= SNBEP_U_MSR_PMON_UCLK_FIXED_CTR, +	.fixed_ctl	= SNBEP_U_MSR_PMON_UCLK_FIXED_CTL, +	.ops		= &snbep_uncore_msr_ops, +	.format_group	= &snbep_uncore_ubox_format_group, +}; + +static struct intel_uncore_type snbep_uncore_cbox = { +	.name			= "cbox", +	.num_counters		= 4, +	.num_boxes		= 8, +	.perf_ctr_bits		= 44, +	.event_ctl		= SNBEP_C0_MSR_PMON_CTL0, +	.perf_ctr		= SNBEP_C0_MSR_PMON_CTR0, +	.event_mask		= SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK, +	.box_ctl		= SNBEP_C0_MSR_PMON_BOX_CTL, +	.msr_offset		= SNBEP_CBO_MSR_OFFSET, +	.num_shared_regs	= 1, +	.constraints		= snbep_uncore_cbox_constraints, +	.ops			= &snbep_uncore_msr_ops, +	.format_group		= &snbep_uncore_cbox_format_group, +}; + +static struct intel_uncore_type snbep_uncore_pcu = { +	.name			= "pcu", +	.num_counters		= 4, +	.num_boxes		= 1, +	.perf_ctr_bits		= 48, +	.perf_ctr		= SNBEP_PCU_MSR_PMON_CTR0, +	.event_ctl		= SNBEP_PCU_MSR_PMON_CTL0, +	.event_mask		= SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, +	.box_ctl		= SNBEP_PCU_MSR_PMON_BOX_CTL, +	.num_shared_regs	= 1, +	.ops			= &snbep_uncore_msr_ops, +	.format_group		= &snbep_uncore_pcu_format_group, +}; + +static struct intel_uncore_type *snbep_msr_uncores[] = { +	&snbep_uncore_ubox, +	&snbep_uncore_cbox, +	&snbep_uncore_pcu, +	NULL, +}; + +#define SNBEP_UNCORE_PCI_COMMON_INIT()				\ +	.perf_ctr	= SNBEP_PCI_PMON_CTR0,			\ +	.event_ctl	= SNBEP_PCI_PMON_CTL0,			\ +	.event_mask	= SNBEP_PMON_RAW_EVENT_MASK,		\ +	.box_ctl	= SNBEP_PCI_PMON_BOX_CTL,		\ +	.ops		= &snbep_uncore_pci_ops,		\ +	.format_group	= &snbep_uncore_format_group + +static struct intel_uncore_type snbep_uncore_ha = { +	.name		= "ha", +	.num_counters   = 4, +	.num_boxes	= 1, +	.perf_ctr_bits	= 48, +	SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_imc = { +	.name		= "imc", +	.num_counters   = 4, +	.num_boxes	= 4, +	.perf_ctr_bits	= 48, +	.fixed_ctr_bits	= 48, +	.fixed_ctr	= SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, +	.fixed_ctl	= SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, +	.event_descs	= snbep_uncore_imc_events, +	SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_qpi = { +	.name		= "qpi", +	.num_counters   = 4, +	.num_boxes	= 2, +	.perf_ctr_bits	= 48, +	.event_descs	= snbep_uncore_qpi_events, +	SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + + +static struct intel_uncore_type snbep_uncore_r2pcie = { +	.name		= "r2pcie", +	.num_counters   = 4, +	.num_boxes	= 1, +	.perf_ctr_bits	= 44, +	.constraints	= snbep_uncore_r2pcie_constraints, +	SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_r3qpi = { +	.name		= "r3qpi", +	.num_counters   = 3, +	.num_boxes	= 2, +	.perf_ctr_bits	= 44, +	.constraints	= snbep_uncore_r3qpi_constraints, +	SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type *snbep_pci_uncores[] = { +	&snbep_uncore_ha, +	&snbep_uncore_imc, +	&snbep_uncore_qpi, +	&snbep_uncore_r2pcie, +	&snbep_uncore_r3qpi, +	NULL, +}; + +static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = { +	{ /* Home Agent */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA), +		.driver_data = (unsigned long)&snbep_uncore_ha, +	}, +	{ /* MC Channel 0 */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0), +		.driver_data = (unsigned long)&snbep_uncore_imc, +	}, +	{ /* MC Channel 1 */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1), +		.driver_data = (unsigned long)&snbep_uncore_imc, +	}, +	{ /* MC Channel 2 */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2), +		.driver_data = (unsigned long)&snbep_uncore_imc, +	}, +	{ /* MC Channel 3 */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3), +		.driver_data = (unsigned long)&snbep_uncore_imc, +	}, +	{ /* QPI Port 0 */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0), +		.driver_data = (unsigned long)&snbep_uncore_qpi, +	}, +	{ /* QPI Port 1 */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1), +		.driver_data = (unsigned long)&snbep_uncore_qpi, +	}, +	{ /* P2PCIe */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE), +		.driver_data = (unsigned long)&snbep_uncore_r2pcie, +	}, +	{ /* R3QPI Link 0 */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0), +		.driver_data = (unsigned long)&snbep_uncore_r3qpi, +	}, +	{ /* R3QPI Link 1 */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1), +		.driver_data = (unsigned long)&snbep_uncore_r3qpi, +	}, +	{ /* end: all zeroes */ } +}; + +static struct pci_driver snbep_uncore_pci_driver = { +	.name		= "snbep_uncore", +	.id_table	= snbep_uncore_pci_ids, +}; + +/* + * build pci bus to socket mapping + */ +static void snbep_pci2phy_map_init(void) +{ +	struct pci_dev *ubox_dev = NULL; +	int i, bus, nodeid; +	u32 config; + +	while (1) { +		/* find the UBOX device */ +		ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, +					PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX, +					ubox_dev); +		if (!ubox_dev) +			break; +		bus = ubox_dev->bus->number; +		/* get the Node ID of the local register */ +		pci_read_config_dword(ubox_dev, 0x40, &config); +		nodeid = config; +		/* get the Node ID mapping */ +		pci_read_config_dword(ubox_dev, 0x54, &config); +		/* +		 * every three bits in the Node ID mapping register maps +		 * to a particular node. +		 */ +		for (i = 0; i < 8; i++) { +			if (nodeid == ((config >> (3 * i)) & 0x7)) { +				pcibus_to_physid[bus] = i; +				break; +			} +		} +	}; +	return; +} +/* end of Sandy Bridge-EP uncore support */ + + +/* Sandy Bridge uncore support */ +static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct hw_perf_event *hwc = &event->hw; + +	if (hwc->idx < UNCORE_PMC_IDX_FIXED) +		wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); +	else +		wrmsrl(hwc->config_base, SNB_UNC_CTL_EN); +} + +static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	wrmsrl(event->hw.config_base, 0); +} + +static u64 snb_uncore_msr_read_counter(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	u64 count; +	rdmsrl(event->hw.event_base, count); +	return count; +} + +static void snb_uncore_msr_init_box(struct intel_uncore_box *box) +{ +	if (box->pmu->pmu_idx == 0) { +		wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, +			SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); +	} +} + +static struct attribute *snb_uncore_formats_attr[] = { +	&format_attr_event.attr, +	&format_attr_umask.attr, +	&format_attr_edge.attr, +	&format_attr_inv.attr, +	&format_attr_cmask5.attr, +	NULL, +}; + +static struct attribute_group snb_uncore_format_group = { +	.name = "format", +	.attrs = snb_uncore_formats_attr, +}; + +static struct intel_uncore_ops snb_uncore_msr_ops = { +	.init_box	= snb_uncore_msr_init_box, +	.disable_event	= snb_uncore_msr_disable_event, +	.enable_event	= snb_uncore_msr_enable_event, +	.read_counter	= snb_uncore_msr_read_counter, +}; + +static struct event_constraint snb_uncore_cbox_constraints[] = { +	UNCORE_EVENT_CONSTRAINT(0x80, 0x1), +	UNCORE_EVENT_CONSTRAINT(0x83, 0x1), +	EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type snb_uncore_cbox = { +	.name		= "cbox", +	.num_counters   = 2, +	.num_boxes	= 4, +	.perf_ctr_bits	= 44, +	.fixed_ctr_bits	= 48, +	.perf_ctr	= SNB_UNC_CBO_0_PER_CTR0, +	.event_ctl	= SNB_UNC_CBO_0_PERFEVTSEL0, +	.fixed_ctr	= SNB_UNC_FIXED_CTR, +	.fixed_ctl	= SNB_UNC_FIXED_CTR_CTRL, +	.single_fixed	= 1, +	.event_mask	= SNB_UNC_RAW_EVENT_MASK, +	.msr_offset	= SNB_UNC_CBO_MSR_OFFSET, +	.constraints	= snb_uncore_cbox_constraints, +	.ops		= &snb_uncore_msr_ops, +	.format_group	= &snb_uncore_format_group, +}; + +static struct intel_uncore_type *snb_msr_uncores[] = { +	&snb_uncore_cbox, +	NULL, +}; +/* end of Sandy Bridge uncore support */ + +/* Nehalem uncore support */ +static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box) +{ +	wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0); +} + +static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box) +{ +	wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, +		NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); +} + +static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct hw_perf_event *hwc = &event->hw; + +	if (hwc->idx < UNCORE_PMC_IDX_FIXED) +		wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); +	else +		wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN); +} + +static struct attribute *nhm_uncore_formats_attr[] = { +	&format_attr_event.attr, +	&format_attr_umask.attr, +	&format_attr_edge.attr, +	&format_attr_inv.attr, +	&format_attr_cmask8.attr, +	NULL, +}; + +static struct attribute_group nhm_uncore_format_group = { +	.name = "format", +	.attrs = nhm_uncore_formats_attr, +}; + +static struct uncore_event_desc nhm_uncore_events[] = { +	INTEL_UNCORE_EVENT_DESC(clockticks,                "event=0xff,umask=0x00"), +	INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any,       "event=0x2f,umask=0x0f"), +	INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any,      "event=0x2c,umask=0x0f"), +	INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads,     "event=0x20,umask=0x01"), +	INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes,    "event=0x20,umask=0x02"), +	INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads,  "event=0x20,umask=0x04"), +	INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"), +	INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads,   "event=0x20,umask=0x10"), +	INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes,  "event=0x20,umask=0x20"), +	{ /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops nhm_uncore_msr_ops = { +	.disable_box	= nhm_uncore_msr_disable_box, +	.enable_box	= nhm_uncore_msr_enable_box, +	.disable_event	= snb_uncore_msr_disable_event, +	.enable_event	= nhm_uncore_msr_enable_event, +	.read_counter	= snb_uncore_msr_read_counter, +}; + +static struct intel_uncore_type nhm_uncore = { +	.name		= "", +	.num_counters   = 8, +	.num_boxes	= 1, +	.perf_ctr_bits	= 48, +	.fixed_ctr_bits	= 48, +	.event_ctl	= NHM_UNC_PERFEVTSEL0, +	.perf_ctr	= NHM_UNC_UNCORE_PMC0, +	.fixed_ctr	= NHM_UNC_FIXED_CTR, +	.fixed_ctl	= NHM_UNC_FIXED_CTR_CTRL, +	.event_mask	= NHM_UNC_RAW_EVENT_MASK, +	.event_descs	= nhm_uncore_events, +	.ops		= &nhm_uncore_msr_ops, +	.format_group	= &nhm_uncore_format_group, +}; + +static struct intel_uncore_type *nhm_msr_uncores[] = { +	&nhm_uncore, +	NULL, +}; +/* end of Nehalem uncore support */ + +static void uncore_assign_hw_event(struct intel_uncore_box *box, +				struct perf_event *event, int idx) +{ +	struct hw_perf_event *hwc = &event->hw; + +	hwc->idx = idx; +	hwc->last_tag = ++box->tags[idx]; + +	if (hwc->idx == UNCORE_PMC_IDX_FIXED) { +		hwc->event_base = uncore_fixed_ctr(box); +		hwc->config_base = uncore_fixed_ctl(box); +		return; +	} + +	hwc->config_base = uncore_event_ctl(box, hwc->idx); +	hwc->event_base  = uncore_perf_ctr(box, hwc->idx); +} + +static void uncore_perf_event_update(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	u64 prev_count, new_count, delta; +	int shift; + +	if (event->hw.idx >= UNCORE_PMC_IDX_FIXED) +		shift = 64 - uncore_fixed_ctr_bits(box); +	else +		shift = 64 - uncore_perf_ctr_bits(box); + +	/* the hrtimer might modify the previous event value */ +again: +	prev_count = local64_read(&event->hw.prev_count); +	new_count = uncore_read_counter(box, event); +	if (local64_xchg(&event->hw.prev_count, new_count) != prev_count) +		goto again; + +	delta = (new_count << shift) - (prev_count << shift); +	delta >>= shift; + +	local64_add(delta, &event->count); +} + +/* + * The overflow interrupt is unavailable for SandyBridge-EP, is broken + * for SandyBridge. So we use hrtimer to periodically poll the counter + * to avoid overflow. + */ +static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) +{ +	struct intel_uncore_box *box; +	unsigned long flags; +	int bit; + +	box = container_of(hrtimer, struct intel_uncore_box, hrtimer); +	if (!box->n_active || box->cpu != smp_processor_id()) +		return HRTIMER_NORESTART; +	/* +	 * disable local interrupt to prevent uncore_pmu_event_start/stop +	 * to interrupt the update process +	 */ +	local_irq_save(flags); + +	for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) +		uncore_perf_event_update(box, box->events[bit]); + +	local_irq_restore(flags); + +	hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL)); +	return HRTIMER_RESTART; +} + +static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) +{ +	__hrtimer_start_range_ns(&box->hrtimer, +			ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0, +			HRTIMER_MODE_REL_PINNED, 0); +} + +static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) +{ +	hrtimer_cancel(&box->hrtimer); +} + +static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) +{ +	hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); +	box->hrtimer.function = uncore_pmu_hrtimer; +} + +struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, +					  int cpu) +{ +	struct intel_uncore_box *box; +	int i, size; + +	size = sizeof(*box) + type->num_shared_regs * +		sizeof(struct intel_uncore_extra_reg); + +	box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu)); +	if (!box) +		return NULL; + +	for (i = 0; i < type->num_shared_regs; i++) +		raw_spin_lock_init(&box->shared_regs[i].lock); + +	uncore_pmu_init_hrtimer(box); +	atomic_set(&box->refcnt, 1); +	box->cpu = -1; +	box->phys_id = -1; + +	return box; +} + +static struct intel_uncore_box * +uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) +{ +	static struct intel_uncore_box *box; + +	box = *per_cpu_ptr(pmu->box, cpu); +	if (box) +		return box; + +	raw_spin_lock(&uncore_box_lock); +	list_for_each_entry(box, &pmu->box_list, list) { +		if (box->phys_id == topology_physical_package_id(cpu)) { +			atomic_inc(&box->refcnt); +			*per_cpu_ptr(pmu->box, cpu) = box; +			break; +		} +	} +	raw_spin_unlock(&uncore_box_lock); + +	return *per_cpu_ptr(pmu->box, cpu); +} + +static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) +{ +	return container_of(event->pmu, struct intel_uncore_pmu, pmu); +} + +static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) +{ +	/* +	 * perf core schedules event on the basis of cpu, uncore events are +	 * collected by one of the cpus inside a physical package. +	 */ +	return uncore_pmu_to_box(uncore_event_to_pmu(event), +				 smp_processor_id()); +} + +static int uncore_collect_events(struct intel_uncore_box *box, +				struct perf_event *leader, bool dogrp) +{ +	struct perf_event *event; +	int n, max_count; + +	max_count = box->pmu->type->num_counters; +	if (box->pmu->type->fixed_ctl) +		max_count++; + +	if (box->n_events >= max_count) +		return -EINVAL; + +	n = box->n_events; +	box->event_list[n] = leader; +	n++; +	if (!dogrp) +		return n; + +	list_for_each_entry(event, &leader->sibling_list, group_entry) { +		if (event->state <= PERF_EVENT_STATE_OFF) +			continue; + +		if (n >= max_count) +			return -EINVAL; + +		box->event_list[n] = event; +		n++; +	} +	return n; +} + +static struct event_constraint * +uncore_get_event_constraint(struct intel_uncore_box *box, +			    struct perf_event *event) +{ +	struct intel_uncore_type *type = box->pmu->type; +	struct event_constraint *c; + +	if (type->ops->get_constraint) { +		c = type->ops->get_constraint(box, event); +		if (c) +			return c; +	} + +	if (event->hw.config == ~0ULL) +		return &constraint_fixed; + +	if (type->constraints) { +		for_each_event_constraint(c, type->constraints) { +			if ((event->hw.config & c->cmask) == c->code) +				return c; +		} +	} + +	return &type->unconstrainted; +} + +static void uncore_put_event_constraint(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	if (box->pmu->type->ops->put_constraint) +		box->pmu->type->ops->put_constraint(box, event); +} + +static int uncore_assign_events(struct intel_uncore_box *box, +				int assign[], int n) +{ +	unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; +	struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX]; +	int i, wmin, wmax, ret = 0; +	struct hw_perf_event *hwc; + +	bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); + +	for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { +		c = uncore_get_event_constraint(box, box->event_list[i]); +		constraints[i] = c; +		wmin = min(wmin, c->weight); +		wmax = max(wmax, c->weight); +	} + +	/* fastpath, try to reuse previous register */ +	for (i = 0; i < n; i++) { +		hwc = &box->event_list[i]->hw; +		c = constraints[i]; + +		/* never assigned */ +		if (hwc->idx == -1) +			break; + +		/* constraint still honored */ +		if (!test_bit(hwc->idx, c->idxmsk)) +			break; + +		/* not already used */ +		if (test_bit(hwc->idx, used_mask)) +			break; + +		__set_bit(hwc->idx, used_mask); +		if (assign) +			assign[i] = hwc->idx; +	} +	/* slow path */ +	if (i != n) +		ret = perf_assign_events(constraints, n, wmin, wmax, assign); + +	if (!assign || ret) { +		for (i = 0; i < n; i++) +			uncore_put_event_constraint(box, box->event_list[i]); +	} +	return ret ? -EINVAL : 0; +} + +static void uncore_pmu_event_start(struct perf_event *event, int flags) +{ +	struct intel_uncore_box *box = uncore_event_to_box(event); +	int idx = event->hw.idx; + +	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) +		return; + +	if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) +		return; + +	event->hw.state = 0; +	box->events[idx] = event; +	box->n_active++; +	__set_bit(idx, box->active_mask); + +	local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); +	uncore_enable_event(box, event); + +	if (box->n_active == 1) { +		uncore_enable_box(box); +		uncore_pmu_start_hrtimer(box); +	} +} + +static void uncore_pmu_event_stop(struct perf_event *event, int flags) +{ +	struct intel_uncore_box *box = uncore_event_to_box(event); +	struct hw_perf_event *hwc = &event->hw; + +	if (__test_and_clear_bit(hwc->idx, box->active_mask)) { +		uncore_disable_event(box, event); +		box->n_active--; +		box->events[hwc->idx] = NULL; +		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); +		hwc->state |= PERF_HES_STOPPED; + +		if (box->n_active == 0) { +			uncore_disable_box(box); +			uncore_pmu_cancel_hrtimer(box); +		} +	} + +	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { +		/* +		 * Drain the remaining delta count out of a event +		 * that we are disabling: +		 */ +		uncore_perf_event_update(box, event); +		hwc->state |= PERF_HES_UPTODATE; +	} +} + +static int uncore_pmu_event_add(struct perf_event *event, int flags) +{ +	struct intel_uncore_box *box = uncore_event_to_box(event); +	struct hw_perf_event *hwc = &event->hw; +	int assign[UNCORE_PMC_IDX_MAX]; +	int i, n, ret; + +	if (!box) +		return -ENODEV; + +	ret = n = uncore_collect_events(box, event, false); +	if (ret < 0) +		return ret; + +	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; +	if (!(flags & PERF_EF_START)) +		hwc->state |= PERF_HES_ARCH; + +	ret = uncore_assign_events(box, assign, n); +	if (ret) +		return ret; + +	/* save events moving to new counters */ +	for (i = 0; i < box->n_events; i++) { +		event = box->event_list[i]; +		hwc = &event->hw; + +		if (hwc->idx == assign[i] && +			hwc->last_tag == box->tags[assign[i]]) +			continue; +		/* +		 * Ensure we don't accidentally enable a stopped +		 * counter simply because we rescheduled. +		 */ +		if (hwc->state & PERF_HES_STOPPED) +			hwc->state |= PERF_HES_ARCH; + +		uncore_pmu_event_stop(event, PERF_EF_UPDATE); +	} + +	/* reprogram moved events into new counters */ +	for (i = 0; i < n; i++) { +		event = box->event_list[i]; +		hwc = &event->hw; + +		if (hwc->idx != assign[i] || +			hwc->last_tag != box->tags[assign[i]]) +			uncore_assign_hw_event(box, event, assign[i]); +		else if (i < box->n_events) +			continue; + +		if (hwc->state & PERF_HES_ARCH) +			continue; + +		uncore_pmu_event_start(event, 0); +	} +	box->n_events = n; + +	return 0; +} + +static void uncore_pmu_event_del(struct perf_event *event, int flags) +{ +	struct intel_uncore_box *box = uncore_event_to_box(event); +	int i; + +	uncore_pmu_event_stop(event, PERF_EF_UPDATE); + +	for (i = 0; i < box->n_events; i++) { +		if (event == box->event_list[i]) { +			uncore_put_event_constraint(box, event); + +			while (++i < box->n_events) +				box->event_list[i - 1] = box->event_list[i]; + +			--box->n_events; +			break; +		} +	} + +	event->hw.idx = -1; +	event->hw.last_tag = ~0ULL; +} + +static void uncore_pmu_event_read(struct perf_event *event) +{ +	struct intel_uncore_box *box = uncore_event_to_box(event); +	uncore_perf_event_update(box, event); +} + +/* + * validation ensures the group can be loaded onto the + * PMU if it was the only group available. + */ +static int uncore_validate_group(struct intel_uncore_pmu *pmu, +				struct perf_event *event) +{ +	struct perf_event *leader = event->group_leader; +	struct intel_uncore_box *fake_box; +	int ret = -EINVAL, n; + +	fake_box = uncore_alloc_box(pmu->type, smp_processor_id()); +	if (!fake_box) +		return -ENOMEM; + +	fake_box->pmu = pmu; +	/* +	 * the event is not yet connected with its +	 * siblings therefore we must first collect +	 * existing siblings, then add the new event +	 * before we can simulate the scheduling +	 */ +	n = uncore_collect_events(fake_box, leader, true); +	if (n < 0) +		goto out; + +	fake_box->n_events = n; +	n = uncore_collect_events(fake_box, event, false); +	if (n < 0) +		goto out; + +	fake_box->n_events = n; + +	ret = uncore_assign_events(fake_box, NULL, n); +out: +	kfree(fake_box); +	return ret; +} + +int uncore_pmu_event_init(struct perf_event *event) +{ +	struct intel_uncore_pmu *pmu; +	struct intel_uncore_box *box; +	struct hw_perf_event *hwc = &event->hw; +	int ret; + +	if (event->attr.type != event->pmu->type) +		return -ENOENT; + +	pmu = uncore_event_to_pmu(event); +	/* no device found for this pmu */ +	if (pmu->func_id < 0) +		return -ENOENT; + +	/* +	 * Uncore PMU does measure at all privilege level all the time. +	 * So it doesn't make sense to specify any exclude bits. +	 */ +	if (event->attr.exclude_user || event->attr.exclude_kernel || +			event->attr.exclude_hv || event->attr.exclude_idle) +		return -EINVAL; + +	/* Sampling not supported yet */ +	if (hwc->sample_period) +		return -EINVAL; + +	/* +	 * Place all uncore events for a particular physical package +	 * onto a single cpu +	 */ +	if (event->cpu < 0) +		return -EINVAL; +	box = uncore_pmu_to_box(pmu, event->cpu); +	if (!box || box->cpu < 0) +		return -EINVAL; +	event->cpu = box->cpu; + +	event->hw.idx = -1; +	event->hw.last_tag = ~0ULL; +	event->hw.extra_reg.idx = EXTRA_REG_NONE; + +	if (event->attr.config == UNCORE_FIXED_EVENT) { +		/* no fixed counter */ +		if (!pmu->type->fixed_ctl) +			return -EINVAL; +		/* +		 * if there is only one fixed counter, only the first pmu +		 * can access the fixed counter +		 */ +		if (pmu->type->single_fixed && pmu->pmu_idx > 0) +			return -EINVAL; +		hwc->config = ~0ULL; +	} else { +		hwc->config = event->attr.config & pmu->type->event_mask; +		if (pmu->type->ops->hw_config) { +			ret = pmu->type->ops->hw_config(box, event); +			if (ret) +				return ret; +		} +	} + +	if (event->group_leader != event) +		ret = uncore_validate_group(pmu, event); +	else +		ret = 0; + +	return ret; +} + +static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) +{ +	int ret; + +	pmu->pmu = (struct pmu) { +		.attr_groups	= pmu->type->attr_groups, +		.task_ctx_nr	= perf_invalid_context, +		.event_init	= uncore_pmu_event_init, +		.add		= uncore_pmu_event_add, +		.del		= uncore_pmu_event_del, +		.start		= uncore_pmu_event_start, +		.stop		= uncore_pmu_event_stop, +		.read		= uncore_pmu_event_read, +	}; + +	if (pmu->type->num_boxes == 1) { +		if (strlen(pmu->type->name) > 0) +			sprintf(pmu->name, "uncore_%s", pmu->type->name); +		else +			sprintf(pmu->name, "uncore"); +	} else { +		sprintf(pmu->name, "uncore_%s_%d", pmu->type->name, +			pmu->pmu_idx); +	} + +	ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); +	return ret; +} + +static void __init uncore_type_exit(struct intel_uncore_type *type) +{ +	int i; + +	for (i = 0; i < type->num_boxes; i++) +		free_percpu(type->pmus[i].box); +	kfree(type->pmus); +	type->pmus = NULL; +	kfree(type->attr_groups[1]); +	type->attr_groups[1] = NULL; +} + +static void uncore_types_exit(struct intel_uncore_type **types) +{ +	int i; +	for (i = 0; types[i]; i++) +		uncore_type_exit(types[i]); +} + +static int __init uncore_type_init(struct intel_uncore_type *type) +{ +	struct intel_uncore_pmu *pmus; +	struct attribute_group *events_group; +	struct attribute **attrs; +	int i, j; + +	pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL); +	if (!pmus) +		return -ENOMEM; + +	type->unconstrainted = (struct event_constraint) +		__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, +				0, type->num_counters, 0); + +	for (i = 0; i < type->num_boxes; i++) { +		pmus[i].func_id = -1; +		pmus[i].pmu_idx = i; +		pmus[i].type = type; +		INIT_LIST_HEAD(&pmus[i].box_list); +		pmus[i].box = alloc_percpu(struct intel_uncore_box *); +		if (!pmus[i].box) +			goto fail; +	} + +	if (type->event_descs) { +		i = 0; +		while (type->event_descs[i].attr.attr.name) +			i++; + +		events_group = kzalloc(sizeof(struct attribute *) * (i + 1) + +					sizeof(*events_group), GFP_KERNEL); +		if (!events_group) +			goto fail; + +		attrs = (struct attribute **)(events_group + 1); +		events_group->name = "events"; +		events_group->attrs = attrs; + +		for (j = 0; j < i; j++) +			attrs[j] = &type->event_descs[j].attr.attr; + +		type->attr_groups[1] = events_group; +	} + +	type->pmus = pmus; +	return 0; +fail: +	uncore_type_exit(type); +	return -ENOMEM; +} + +static int __init uncore_types_init(struct intel_uncore_type **types) +{ +	int i, ret; + +	for (i = 0; types[i]; i++) { +		ret = uncore_type_init(types[i]); +		if (ret) +			goto fail; +	} +	return 0; +fail: +	while (--i >= 0) +		uncore_type_exit(types[i]); +	return ret; +} + +static struct pci_driver *uncore_pci_driver; +static bool pcidrv_registered; + +/* + * add a pci uncore device + */ +static int __devinit uncore_pci_add(struct intel_uncore_type *type, +				    struct pci_dev *pdev) +{ +	struct intel_uncore_pmu *pmu; +	struct intel_uncore_box *box; +	int i, phys_id; + +	phys_id = pcibus_to_physid[pdev->bus->number]; +	if (phys_id < 0) +		return -ENODEV; + +	box = uncore_alloc_box(type, 0); +	if (!box) +		return -ENOMEM; + +	/* +	 * for performance monitoring unit with multiple boxes, +	 * each box has a different function id. +	 */ +	for (i = 0; i < type->num_boxes; i++) { +		pmu = &type->pmus[i]; +		if (pmu->func_id == pdev->devfn) +			break; +		if (pmu->func_id < 0) { +			pmu->func_id = pdev->devfn; +			break; +		} +		pmu = NULL; +	} + +	if (!pmu) { +		kfree(box); +		return -EINVAL; +	} + +	box->phys_id = phys_id; +	box->pci_dev = pdev; +	box->pmu = pmu; +	uncore_box_init(box); +	pci_set_drvdata(pdev, box); + +	raw_spin_lock(&uncore_box_lock); +	list_add_tail(&box->list, &pmu->box_list); +	raw_spin_unlock(&uncore_box_lock); + +	return 0; +} + +static void uncore_pci_remove(struct pci_dev *pdev) +{ +	struct intel_uncore_box *box = pci_get_drvdata(pdev); +	struct intel_uncore_pmu *pmu = box->pmu; +	int cpu, phys_id = pcibus_to_physid[pdev->bus->number]; + +	if (WARN_ON_ONCE(phys_id != box->phys_id)) +		return; + +	raw_spin_lock(&uncore_box_lock); +	list_del(&box->list); +	raw_spin_unlock(&uncore_box_lock); + +	for_each_possible_cpu(cpu) { +		if (*per_cpu_ptr(pmu->box, cpu) == box) { +			*per_cpu_ptr(pmu->box, cpu) = NULL; +			atomic_dec(&box->refcnt); +		} +	} + +	WARN_ON_ONCE(atomic_read(&box->refcnt) != 1); +	kfree(box); +} + +static int __devinit uncore_pci_probe(struct pci_dev *pdev, +				const struct pci_device_id *id) +{ +	struct intel_uncore_type *type; + +	type = (struct intel_uncore_type *)id->driver_data; +	return uncore_pci_add(type, pdev); +} + +static int __init uncore_pci_init(void) +{ +	int ret; + +	switch (boot_cpu_data.x86_model) { +	case 45: /* Sandy Bridge-EP */ +		pci_uncores = snbep_pci_uncores; +		uncore_pci_driver = &snbep_uncore_pci_driver; +		snbep_pci2phy_map_init(); +		break; +	default: +		return 0; +	} + +	ret = uncore_types_init(pci_uncores); +	if (ret) +		return ret; + +	uncore_pci_driver->probe = uncore_pci_probe; +	uncore_pci_driver->remove = uncore_pci_remove; + +	ret = pci_register_driver(uncore_pci_driver); +	if (ret == 0) +		pcidrv_registered = true; +	else +		uncore_types_exit(pci_uncores); + +	return ret; +} + +static void __init uncore_pci_exit(void) +{ +	if (pcidrv_registered) { +		pcidrv_registered = false; +		pci_unregister_driver(uncore_pci_driver); +		uncore_types_exit(pci_uncores); +	} +} + +static void __cpuinit uncore_cpu_dying(int cpu) +{ +	struct intel_uncore_type *type; +	struct intel_uncore_pmu *pmu; +	struct intel_uncore_box *box; +	int i, j; + +	for (i = 0; msr_uncores[i]; i++) { +		type = msr_uncores[i]; +		for (j = 0; j < type->num_boxes; j++) { +			pmu = &type->pmus[j]; +			box = *per_cpu_ptr(pmu->box, cpu); +			*per_cpu_ptr(pmu->box, cpu) = NULL; +			if (box && atomic_dec_and_test(&box->refcnt)) +				kfree(box); +		} +	} +} + +static int __cpuinit uncore_cpu_starting(int cpu) +{ +	struct intel_uncore_type *type; +	struct intel_uncore_pmu *pmu; +	struct intel_uncore_box *box, *exist; +	int i, j, k, phys_id; + +	phys_id = topology_physical_package_id(cpu); + +	for (i = 0; msr_uncores[i]; i++) { +		type = msr_uncores[i]; +		for (j = 0; j < type->num_boxes; j++) { +			pmu = &type->pmus[j]; +			box = *per_cpu_ptr(pmu->box, cpu); +			/* called by uncore_cpu_init? */ +			if (box && box->phys_id >= 0) { +				uncore_box_init(box); +				continue; +			} + +			for_each_online_cpu(k) { +				exist = *per_cpu_ptr(pmu->box, k); +				if (exist && exist->phys_id == phys_id) { +					atomic_inc(&exist->refcnt); +					*per_cpu_ptr(pmu->box, cpu) = exist; +					kfree(box); +					box = NULL; +					break; +				} +			} + +			if (box) { +				box->phys_id = phys_id; +				uncore_box_init(box); +			} +		} +	} +	return 0; +} + +static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id) +{ +	struct intel_uncore_type *type; +	struct intel_uncore_pmu *pmu; +	struct intel_uncore_box *box; +	int i, j; + +	for (i = 0; msr_uncores[i]; i++) { +		type = msr_uncores[i]; +		for (j = 0; j < type->num_boxes; j++) { +			pmu = &type->pmus[j]; +			if (pmu->func_id < 0) +				pmu->func_id = j; + +			box = uncore_alloc_box(type, cpu); +			if (!box) +				return -ENOMEM; + +			box->pmu = pmu; +			box->phys_id = phys_id; +			*per_cpu_ptr(pmu->box, cpu) = box; +		} +	} +	return 0; +} + +static void __cpuinit uncore_change_context(struct intel_uncore_type **uncores, +					    int old_cpu, int new_cpu) +{ +	struct intel_uncore_type *type; +	struct intel_uncore_pmu *pmu; +	struct intel_uncore_box *box; +	int i, j; + +	for (i = 0; uncores[i]; i++) { +		type = uncores[i]; +		for (j = 0; j < type->num_boxes; j++) { +			pmu = &type->pmus[j]; +			if (old_cpu < 0) +				box = uncore_pmu_to_box(pmu, new_cpu); +			else +				box = uncore_pmu_to_box(pmu, old_cpu); +			if (!box) +				continue; + +			if (old_cpu < 0) { +				WARN_ON_ONCE(box->cpu != -1); +				box->cpu = new_cpu; +				continue; +			} + +			WARN_ON_ONCE(box->cpu != old_cpu); +			if (new_cpu >= 0) { +				uncore_pmu_cancel_hrtimer(box); +				perf_pmu_migrate_context(&pmu->pmu, +						old_cpu, new_cpu); +				box->cpu = new_cpu; +			} else { +				box->cpu = -1; +			} +		} +	} +} + +static void __cpuinit uncore_event_exit_cpu(int cpu) +{ +	int i, phys_id, target; + +	/* if exiting cpu is used for collecting uncore events */ +	if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) +		return; + +	/* find a new cpu to collect uncore events */ +	phys_id = topology_physical_package_id(cpu); +	target = -1; +	for_each_online_cpu(i) { +		if (i == cpu) +			continue; +		if (phys_id == topology_physical_package_id(i)) { +			target = i; +			break; +		} +	} + +	/* migrate uncore events to the new cpu */ +	if (target >= 0) +		cpumask_set_cpu(target, &uncore_cpu_mask); + +	uncore_change_context(msr_uncores, cpu, target); +	uncore_change_context(pci_uncores, cpu, target); +} + +static void __cpuinit uncore_event_init_cpu(int cpu) +{ +	int i, phys_id; + +	phys_id = topology_physical_package_id(cpu); +	for_each_cpu(i, &uncore_cpu_mask) { +		if (phys_id == topology_physical_package_id(i)) +			return; +	} + +	cpumask_set_cpu(cpu, &uncore_cpu_mask); + +	uncore_change_context(msr_uncores, -1, cpu); +	uncore_change_context(pci_uncores, -1, cpu); +} + +static int __cpuinit uncore_cpu_notifier(struct notifier_block *self, +					 unsigned long action, void *hcpu) +{ +	unsigned int cpu = (long)hcpu; + +	/* allocate/free data structure for uncore box */ +	switch (action & ~CPU_TASKS_FROZEN) { +	case CPU_UP_PREPARE: +		uncore_cpu_prepare(cpu, -1); +		break; +	case CPU_STARTING: +		uncore_cpu_starting(cpu); +		break; +	case CPU_UP_CANCELED: +	case CPU_DYING: +		uncore_cpu_dying(cpu); +		break; +	default: +		break; +	} + +	/* select the cpu that collects uncore events */ +	switch (action & ~CPU_TASKS_FROZEN) { +	case CPU_DOWN_FAILED: +	case CPU_STARTING: +		uncore_event_init_cpu(cpu); +		break; +	case CPU_DOWN_PREPARE: +		uncore_event_exit_cpu(cpu); +		break; +	default: +		break; +	} + +	return NOTIFY_OK; +} + +static struct notifier_block uncore_cpu_nb __cpuinitdata = { +	.notifier_call = uncore_cpu_notifier, +	/* +	 * to migrate uncore events, our notifier should be executed +	 * before perf core's notifier. +	 */ +	.priority = CPU_PRI_PERF + 1, +}; + +static void __init uncore_cpu_setup(void *dummy) +{ +	uncore_cpu_starting(smp_processor_id()); +} + +static int __init uncore_cpu_init(void) +{ +	int ret, cpu, max_cores; + +	max_cores = boot_cpu_data.x86_max_cores; +	switch (boot_cpu_data.x86_model) { +	case 26: /* Nehalem */ +	case 30: +	case 37: /* Westmere */ +	case 44: +		msr_uncores = nhm_msr_uncores; +		break; +	case 42: /* Sandy Bridge */ +		if (snb_uncore_cbox.num_boxes > max_cores) +			snb_uncore_cbox.num_boxes = max_cores; +		msr_uncores = snb_msr_uncores; +		break; +	case 45: /* Sandy Birdge-EP */ +		if (snbep_uncore_cbox.num_boxes > max_cores) +			snbep_uncore_cbox.num_boxes = max_cores; +		msr_uncores = snbep_msr_uncores; +		break; +	default: +		return 0; +	} + +	ret = uncore_types_init(msr_uncores); +	if (ret) +		return ret; + +	get_online_cpus(); + +	for_each_online_cpu(cpu) { +		int i, phys_id = topology_physical_package_id(cpu); + +		for_each_cpu(i, &uncore_cpu_mask) { +			if (phys_id == topology_physical_package_id(i)) { +				phys_id = -1; +				break; +			} +		} +		if (phys_id < 0) +			continue; + +		uncore_cpu_prepare(cpu, phys_id); +		uncore_event_init_cpu(cpu); +	} +	on_each_cpu(uncore_cpu_setup, NULL, 1); + +	register_cpu_notifier(&uncore_cpu_nb); + +	put_online_cpus(); + +	return 0; +} + +static int __init uncore_pmus_register(void) +{ +	struct intel_uncore_pmu *pmu; +	struct intel_uncore_type *type; +	int i, j; + +	for (i = 0; msr_uncores[i]; i++) { +		type = msr_uncores[i]; +		for (j = 0; j < type->num_boxes; j++) { +			pmu = &type->pmus[j]; +			uncore_pmu_register(pmu); +		} +	} + +	for (i = 0; pci_uncores[i]; i++) { +		type = pci_uncores[i]; +		for (j = 0; j < type->num_boxes; j++) { +			pmu = &type->pmus[j]; +			uncore_pmu_register(pmu); +		} +	} + +	return 0; +} + +static int __init intel_uncore_init(void) +{ +	int ret; + +	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) +		return -ENODEV; + +	ret = uncore_pci_init(); +	if (ret) +		goto fail; +	ret = uncore_cpu_init(); +	if (ret) { +		uncore_pci_exit(); +		goto fail; +	} + +	uncore_pmus_register(); +	return 0; +fail: +	return ret; +} +device_initcall(intel_uncore_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h new file mode 100644 index 00000000000..b13e9ea81de --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -0,0 +1,424 @@ +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/pci.h> +#include <linux/perf_event.h> +#include "perf_event.h" + +#define UNCORE_PMU_NAME_LEN		32 +#define UNCORE_BOX_HASH_SIZE		8 + +#define UNCORE_PMU_HRTIMER_INTERVAL	(60 * NSEC_PER_SEC) + +#define UNCORE_FIXED_EVENT		0xff +#define UNCORE_PMC_IDX_MAX_GENERIC	8 +#define UNCORE_PMC_IDX_FIXED		UNCORE_PMC_IDX_MAX_GENERIC +#define UNCORE_PMC_IDX_MAX		(UNCORE_PMC_IDX_FIXED + 1) + +#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) + +/* SNB event control */ +#define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff +#define SNB_UNC_CTL_UMASK_MASK			0x0000ff00 +#define SNB_UNC_CTL_EDGE_DET			(1 << 18) +#define SNB_UNC_CTL_EN				(1 << 22) +#define SNB_UNC_CTL_INVERT			(1 << 23) +#define SNB_UNC_CTL_CMASK_MASK			0x1f000000 +#define NHM_UNC_CTL_CMASK_MASK			0xff000000 +#define NHM_UNC_FIXED_CTR_CTL_EN		(1 << 0) + +#define SNB_UNC_RAW_EVENT_MASK			(SNB_UNC_CTL_EV_SEL_MASK | \ +						 SNB_UNC_CTL_UMASK_MASK | \ +						 SNB_UNC_CTL_EDGE_DET | \ +						 SNB_UNC_CTL_INVERT | \ +						 SNB_UNC_CTL_CMASK_MASK) + +#define NHM_UNC_RAW_EVENT_MASK			(SNB_UNC_CTL_EV_SEL_MASK | \ +						 SNB_UNC_CTL_UMASK_MASK | \ +						 SNB_UNC_CTL_EDGE_DET | \ +						 SNB_UNC_CTL_INVERT | \ +						 NHM_UNC_CTL_CMASK_MASK) + +/* SNB global control register */ +#define SNB_UNC_PERF_GLOBAL_CTL                 0x391 +#define SNB_UNC_FIXED_CTR_CTRL                  0x394 +#define SNB_UNC_FIXED_CTR                       0x395 + +/* SNB uncore global control */ +#define SNB_UNC_GLOBAL_CTL_CORE_ALL             ((1 << 4) - 1) +#define SNB_UNC_GLOBAL_CTL_EN                   (1 << 29) + +/* SNB Cbo register */ +#define SNB_UNC_CBO_0_PERFEVTSEL0               0x700 +#define SNB_UNC_CBO_0_PER_CTR0                  0x706 +#define SNB_UNC_CBO_MSR_OFFSET                  0x10 + +/* NHM global control register */ +#define NHM_UNC_PERF_GLOBAL_CTL                 0x391 +#define NHM_UNC_FIXED_CTR                       0x394 +#define NHM_UNC_FIXED_CTR_CTRL                  0x395 + +/* NHM uncore global control */ +#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL            ((1ULL << 8) - 1) +#define NHM_UNC_GLOBAL_CTL_EN_FC                (1ULL << 32) + +/* NHM uncore register */ +#define NHM_UNC_PERFEVTSEL0                     0x3c0 +#define NHM_UNC_UNCORE_PMC0                     0x3b0 + +/* SNB-EP Box level control */ +#define SNBEP_PMON_BOX_CTL_RST_CTRL	(1 << 0) +#define SNBEP_PMON_BOX_CTL_RST_CTRS	(1 << 1) +#define SNBEP_PMON_BOX_CTL_FRZ		(1 << 8) +#define SNBEP_PMON_BOX_CTL_FRZ_EN	(1 << 16) +#define SNBEP_PMON_BOX_CTL_INT		(SNBEP_PMON_BOX_CTL_RST_CTRL | \ +					 SNBEP_PMON_BOX_CTL_RST_CTRS | \ +					 SNBEP_PMON_BOX_CTL_FRZ_EN) +/* SNB-EP event control */ +#define SNBEP_PMON_CTL_EV_SEL_MASK	0x000000ff +#define SNBEP_PMON_CTL_UMASK_MASK	0x0000ff00 +#define SNBEP_PMON_CTL_RST		(1 << 17) +#define SNBEP_PMON_CTL_EDGE_DET		(1 << 18) +#define SNBEP_PMON_CTL_EV_SEL_EXT	(1 << 21)	/* only for QPI */ +#define SNBEP_PMON_CTL_EN		(1 << 22) +#define SNBEP_PMON_CTL_INVERT		(1 << 23) +#define SNBEP_PMON_CTL_TRESH_MASK	0xff000000 +#define SNBEP_PMON_RAW_EVENT_MASK	(SNBEP_PMON_CTL_EV_SEL_MASK | \ +					 SNBEP_PMON_CTL_UMASK_MASK | \ +					 SNBEP_PMON_CTL_EDGE_DET | \ +					 SNBEP_PMON_CTL_INVERT | \ +					 SNBEP_PMON_CTL_TRESH_MASK) + +/* SNB-EP Ubox event control */ +#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK		0x1f000000 +#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK		\ +				(SNBEP_PMON_CTL_EV_SEL_MASK | \ +				 SNBEP_PMON_CTL_UMASK_MASK | \ +				 SNBEP_PMON_CTL_EDGE_DET | \ +				 SNBEP_PMON_CTL_INVERT | \ +				 SNBEP_U_MSR_PMON_CTL_TRESH_MASK) + +#define SNBEP_CBO_PMON_CTL_TID_EN		(1 << 19) +#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK	(SNBEP_PMON_RAW_EVENT_MASK | \ +						 SNBEP_CBO_PMON_CTL_TID_EN) + +/* SNB-EP PCU event control */ +#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK	0x0000c000 +#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK	0x1f000000 +#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT	(1 << 30) +#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET	(1 << 31) +#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK	\ +				(SNBEP_PMON_CTL_EV_SEL_MASK | \ +				 SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ +				 SNBEP_PMON_CTL_EDGE_DET | \ +				 SNBEP_PMON_CTL_INVERT | \ +				 SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ +				 SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ +				 SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) + +/* SNB-EP pci control register */ +#define SNBEP_PCI_PMON_BOX_CTL			0xf4 +#define SNBEP_PCI_PMON_CTL0			0xd8 +/* SNB-EP pci counter register */ +#define SNBEP_PCI_PMON_CTR0			0xa0 + +/* SNB-EP home agent register */ +#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0	0x40 +#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1	0x44 +#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH	0x48 +/* SNB-EP memory controller register */ +#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL		0xf0 +#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR		0xd0 +/* SNB-EP QPI register */ +#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0		0x228 +#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1		0x22c +#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0		0x238 +#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1		0x23c + +/* SNB-EP Ubox register */ +#define SNBEP_U_MSR_PMON_CTR0			0xc16 +#define SNBEP_U_MSR_PMON_CTL0			0xc10 + +#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL		0xc08 +#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR		0xc09 + +/* SNB-EP Cbo register */ +#define SNBEP_C0_MSR_PMON_CTR0			0xd16 +#define SNBEP_C0_MSR_PMON_CTL0			0xd10 +#define SNBEP_C0_MSR_PMON_BOX_CTL		0xd04 +#define SNBEP_C0_MSR_PMON_BOX_FILTER		0xd14 +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK	0xfffffc1f +#define SNBEP_CBO_MSR_OFFSET			0x20 + +/* SNB-EP PCU register */ +#define SNBEP_PCU_MSR_PMON_CTR0			0xc36 +#define SNBEP_PCU_MSR_PMON_CTL0			0xc30 +#define SNBEP_PCU_MSR_PMON_BOX_CTL		0xc24 +#define SNBEP_PCU_MSR_PMON_BOX_FILTER		0xc34 +#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK	0xffffffff +#define SNBEP_PCU_MSR_CORE_C3_CTR		0x3fc +#define SNBEP_PCU_MSR_CORE_C6_CTR		0x3fd + +struct intel_uncore_ops; +struct intel_uncore_pmu; +struct intel_uncore_box; +struct uncore_event_desc; + +struct intel_uncore_type { +	const char *name; +	int num_counters; +	int num_boxes; +	int perf_ctr_bits; +	int fixed_ctr_bits; +	unsigned perf_ctr; +	unsigned event_ctl; +	unsigned event_mask; +	unsigned fixed_ctr; +	unsigned fixed_ctl; +	unsigned box_ctl; +	unsigned msr_offset; +	unsigned num_shared_regs:8; +	unsigned single_fixed:1; +	struct event_constraint unconstrainted; +	struct event_constraint *constraints; +	struct intel_uncore_pmu *pmus; +	struct intel_uncore_ops *ops; +	struct uncore_event_desc *event_descs; +	const struct attribute_group *attr_groups[3]; +}; + +#define format_group attr_groups[0] + +struct intel_uncore_ops { +	void (*init_box)(struct intel_uncore_box *); +	void (*disable_box)(struct intel_uncore_box *); +	void (*enable_box)(struct intel_uncore_box *); +	void (*disable_event)(struct intel_uncore_box *, struct perf_event *); +	void (*enable_event)(struct intel_uncore_box *, struct perf_event *); +	u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *); +	int (*hw_config)(struct intel_uncore_box *, struct perf_event *); +	struct event_constraint *(*get_constraint)(struct intel_uncore_box *, +						   struct perf_event *); +	void (*put_constraint)(struct intel_uncore_box *, struct perf_event *); +}; + +struct intel_uncore_pmu { +	struct pmu pmu; +	char name[UNCORE_PMU_NAME_LEN]; +	int pmu_idx; +	int func_id; +	struct intel_uncore_type *type; +	struct intel_uncore_box ** __percpu box; +	struct list_head box_list; +}; + +struct intel_uncore_extra_reg { +	raw_spinlock_t lock; +	u64 config1; +	atomic_t ref; +}; + +struct intel_uncore_box { +	int phys_id; +	int n_active;	/* number of active events */ +	int n_events; +	int cpu;	/* cpu to collect events */ +	unsigned long flags; +	atomic_t refcnt; +	struct perf_event *events[UNCORE_PMC_IDX_MAX]; +	struct perf_event *event_list[UNCORE_PMC_IDX_MAX]; +	unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; +	u64 tags[UNCORE_PMC_IDX_MAX]; +	struct pci_dev *pci_dev; +	struct intel_uncore_pmu *pmu; +	struct hrtimer hrtimer; +	struct list_head list; +	struct intel_uncore_extra_reg shared_regs[0]; +}; + +#define UNCORE_BOX_FLAG_INITIATED	0 + +struct uncore_event_desc { +	struct kobj_attribute attr; +	const char *config; +}; + +#define INTEL_UNCORE_EVENT_DESC(_name, _config)			\ +{								\ +	.attr	= __ATTR(_name, 0444, uncore_event_show, NULL),	\ +	.config	= _config,					\ +} + +#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)			\ +static ssize_t __uncore_##_var##_show(struct kobject *kobj,		\ +				struct kobj_attribute *attr,		\ +				char *page)				\ +{									\ +	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\ +	return sprintf(page, _format "\n");				\ +}									\ +static struct kobj_attribute format_attr_##_var =			\ +	__ATTR(_name, 0444, __uncore_##_var##_show, NULL) + + +static ssize_t uncore_event_show(struct kobject *kobj, +				struct kobj_attribute *attr, char *buf) +{ +	struct uncore_event_desc *event = +		container_of(attr, struct uncore_event_desc, attr); +	return sprintf(buf, "%s", event->config); +} + +static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box) +{ +	return box->pmu->type->box_ctl; +} + +static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box) +{ +	return box->pmu->type->fixed_ctl; +} + +static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box) +{ +	return box->pmu->type->fixed_ctr; +} + +static inline +unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx) +{ +	return idx * 4 + box->pmu->type->event_ctl; +} + +static inline +unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx) +{ +	return idx * 8 + box->pmu->type->perf_ctr; +} + +static inline +unsigned uncore_msr_box_ctl(struct intel_uncore_box *box) +{ +	if (!box->pmu->type->box_ctl) +		return 0; +	return box->pmu->type->box_ctl + +		box->pmu->type->msr_offset * box->pmu->pmu_idx; +} + +static inline +unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box) +{ +	if (!box->pmu->type->fixed_ctl) +		return 0; +	return box->pmu->type->fixed_ctl + +		box->pmu->type->msr_offset * box->pmu->pmu_idx; +} + +static inline +unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box) +{ +	return box->pmu->type->fixed_ctr + +		box->pmu->type->msr_offset * box->pmu->pmu_idx; +} + +static inline +unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx) +{ +	return idx + box->pmu->type->event_ctl + +		box->pmu->type->msr_offset * box->pmu->pmu_idx; +} + +static inline +unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx) +{ +	return idx + box->pmu->type->perf_ctr + +		box->pmu->type->msr_offset * box->pmu->pmu_idx; +} + +static inline +unsigned uncore_fixed_ctl(struct intel_uncore_box *box) +{ +	if (box->pci_dev) +		return uncore_pci_fixed_ctl(box); +	else +		return uncore_msr_fixed_ctl(box); +} + +static inline +unsigned uncore_fixed_ctr(struct intel_uncore_box *box) +{ +	if (box->pci_dev) +		return uncore_pci_fixed_ctr(box); +	else +		return uncore_msr_fixed_ctr(box); +} + +static inline +unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx) +{ +	if (box->pci_dev) +		return uncore_pci_event_ctl(box, idx); +	else +		return uncore_msr_event_ctl(box, idx); +} + +static inline +unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx) +{ +	if (box->pci_dev) +		return uncore_pci_perf_ctr(box, idx); +	else +		return uncore_msr_perf_ctr(box, idx); +} + +static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box) +{ +	return box->pmu->type->perf_ctr_bits; +} + +static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box) +{ +	return box->pmu->type->fixed_ctr_bits; +} + +static inline int uncore_num_counters(struct intel_uncore_box *box) +{ +	return box->pmu->type->num_counters; +} + +static inline void uncore_disable_box(struct intel_uncore_box *box) +{ +	if (box->pmu->type->ops->disable_box) +		box->pmu->type->ops->disable_box(box); +} + +static inline void uncore_enable_box(struct intel_uncore_box *box) +{ +	if (box->pmu->type->ops->enable_box) +		box->pmu->type->ops->enable_box(box); +} + +static inline void uncore_disable_event(struct intel_uncore_box *box, +				struct perf_event *event) +{ +	box->pmu->type->ops->disable_event(box, event); +} + +static inline void uncore_enable_event(struct intel_uncore_box *box, +				struct perf_event *event) +{ +	box->pmu->type->ops->enable_event(box, event); +} + +static inline u64 uncore_read_counter(struct intel_uncore_box *box, +				struct perf_event *event) +{ +	return box->pmu->type->ops->read_counter(box, event); +} + +static inline void uncore_box_init(struct intel_uncore_box *box) +{ +	if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { +		if (box->pmu->type->ops->init_box) +			box->pmu->type->ops->init_box(box); +	} +} diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 47124a73dd7..92c7e39a079 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -895,8 +895,8 @@ static void p4_pmu_disable_pebs(void)  	 * So at moment let leave metrics turned on forever -- it's  	 * ok for now but need to be revisited!  	 * -	 * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0); -	 * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0); +	 * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)0); +	 * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)0);  	 */  } @@ -909,7 +909,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)  	 * state we need to clear P4_CCCR_OVF, otherwise interrupt get  	 * asserted again and again  	 */ -	(void)checking_wrmsrl(hwc->config_base, +	(void)wrmsrl_safe(hwc->config_base,  		(u64)(p4_config_unpack_cccr(hwc->config)) &  			~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);  } @@ -943,8 +943,8 @@ static void p4_pmu_enable_pebs(u64 config)  	bind = &p4_pebs_bind_map[idx]; -	(void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE,	(u64)bind->metric_pebs); -	(void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT,	(u64)bind->metric_vert); +	(void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE,	(u64)bind->metric_pebs); +	(void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT,	(u64)bind->metric_vert);  }  static void p4_pmu_enable_event(struct perf_event *event) @@ -978,8 +978,8 @@ static void p4_pmu_enable_event(struct perf_event *event)  	 */  	p4_pmu_enable_pebs(hwc->config); -	(void)checking_wrmsrl(escr_addr, escr_conf); -	(void)checking_wrmsrl(hwc->config_base, +	(void)wrmsrl_safe(escr_addr, escr_conf); +	(void)wrmsrl_safe(hwc->config_base,  				(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);  } @@ -1325,7 +1325,7 @@ __init int p4_pmu_init(void)  	unsigned int low, high;  	/* If we get stripped -- indexing fails */ -	BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); +	BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC);  	rdmsr(MSR_IA32_MISC_ENABLE, low, high);  	if (!(low & (1 << 7))) { diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 32bcfc7dd23..e4dd0f7a045 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -71,7 +71,7 @@ p6_pmu_disable_event(struct perf_event *event)  	if (cpuc->enabled)  		val |= ARCH_PERFMON_EVENTSEL_ENABLE; -	(void)checking_wrmsrl(hwc->config_base, val); +	(void)wrmsrl_safe(hwc->config_base, val);  }  static void p6_pmu_enable_event(struct perf_event *event) @@ -84,7 +84,7 @@ static void p6_pmu_enable_event(struct perf_event *event)  	if (cpuc->enabled)  		val |= ARCH_PERFMON_EVENTSEL_ENABLE; -	(void)checking_wrmsrl(hwc->config_base, val); +	(void)wrmsrl_safe(hwc->config_base, val);  }  PMU_FORMAT_ATTR(event,	"config:0-7"	); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 571246d81ed..ae42418bc50 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -27,8 +27,8 @@ static int die_counter;  void printk_address(unsigned long address, int reliable)  { -	printk(" [<%p>] %s%pB\n", (void *) address, -			reliable ? "" : "? ", (void *) address); +	pr_cont(" [<%p>] %s%pB\n", +		(void *)address, reliable ? "" : "? ", (void *)address);  }  #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -271,6 +271,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)  			current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)  		return 1; +	print_modules();  	show_regs(regs);  #ifdef CONFIG_X86_32  	if (user_mode_vm(regs)) { diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index e0b1d783daa..1038a417ea5 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -73,11 +73,11 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,  		if (kstack_end(stack))  			break;  		if (i && ((i % STACKSLOTS_PER_LINE) == 0)) -			printk(KERN_CONT "\n"); -		printk(KERN_CONT " %08lx", *stack++); +			pr_cont("\n"); +		pr_cont(" %08lx", *stack++);  		touch_nmi_watchdog();  	} -	printk(KERN_CONT "\n"); +	pr_cont("\n");  	show_trace_log_lvl(task, regs, sp, bp, log_lvl);  } @@ -86,12 +86,11 @@ void show_regs(struct pt_regs *regs)  {  	int i; -	print_modules();  	__show_regs(regs, !user_mode_vm(regs)); -	printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", -		TASK_COMM_LEN, current->comm, task_pid_nr(current), -		current_thread_info(), current, task_thread_info(current)); +	pr_emerg("Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", +		 TASK_COMM_LEN, current->comm, task_pid_nr(current), +		 current_thread_info(), current, task_thread_info(current));  	/*  	 * When in-kernel, we also print out the stack and code at the  	 * time of the fault.. @@ -102,10 +101,10 @@ void show_regs(struct pt_regs *regs)  		unsigned char c;  		u8 *ip; -		printk(KERN_EMERG "Stack:\n"); +		pr_emerg("Stack:\n");  		show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); -		printk(KERN_EMERG "Code: "); +		pr_emerg("Code:");  		ip = (u8 *)regs->ip - code_prologue;  		if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { @@ -116,16 +115,16 @@ void show_regs(struct pt_regs *regs)  		for (i = 0; i < code_len; i++, ip++) {  			if (ip < (u8 *)PAGE_OFFSET ||  					probe_kernel_address(ip, c)) { -				printk(KERN_CONT " Bad EIP value."); +				pr_cont("  Bad EIP value.");  				break;  			}  			if (ip == (u8 *)regs->ip) -				printk(KERN_CONT "<%02x> ", c); +				pr_cont(" <%02x>", c);  			else -				printk(KERN_CONT "%02x ", c); +				pr_cont(" %02x", c);  		}  	} -	printk(KERN_CONT "\n"); +	pr_cont("\n");  }  int is_valid_bugaddr(unsigned long ip) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 791b76122aa..b653675d528 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -228,20 +228,20 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,  		if (stack >= irq_stack && stack <= irq_stack_end) {  			if (stack == irq_stack_end) {  				stack = (unsigned long *) (irq_stack_end[-1]); -				printk(KERN_CONT " <EOI> "); +				pr_cont(" <EOI> ");  			}  		} else {  		if (((long) stack & (THREAD_SIZE-1)) == 0)  			break;  		}  		if (i && ((i % STACKSLOTS_PER_LINE) == 0)) -			printk(KERN_CONT "\n"); -		printk(KERN_CONT " %016lx", *stack++); +			pr_cont("\n"); +		pr_cont(" %016lx", *stack++);  		touch_nmi_watchdog();  	}  	preempt_enable(); -	printk(KERN_CONT "\n"); +	pr_cont("\n");  	show_trace_log_lvl(task, regs, sp, bp, log_lvl);  } @@ -254,10 +254,9 @@ void show_regs(struct pt_regs *regs)  	sp = regs->sp;  	printk("CPU %d ", cpu); -	print_modules();  	__show_regs(regs, 1); -	printk("Process %s (pid: %d, threadinfo %p, task %p)\n", -		cur->comm, cur->pid, task_thread_info(cur), cur); +	printk(KERN_DEFAULT "Process %s (pid: %d, threadinfo %p, task %p)\n", +	       cur->comm, cur->pid, task_thread_info(cur), cur);  	/*  	 * When in-kernel, we also print out the stack and code at the @@ -284,16 +283,16 @@ void show_regs(struct pt_regs *regs)  		for (i = 0; i < code_len; i++, ip++) {  			if (ip < (u8 *)PAGE_OFFSET ||  					probe_kernel_address(ip, c)) { -				printk(KERN_CONT " Bad RIP value."); +				pr_cont(" Bad RIP value.");  				break;  			}  			if (ip == (u8 *)regs->ip) -				printk(KERN_CONT "<%02x> ", c); +				pr_cont("<%02x> ", c);  			else -				printk(KERN_CONT "%02x ", c); +				pr_cont("%02x ", c);  		}  	} -	printk(KERN_CONT "\n"); +	pr_cont("\n");  }  int is_valid_bugaddr(unsigned long ip) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7d65133b51b..111f6bbd8b3 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1758,10 +1758,30 @@ end_repeat_nmi:  	 */  	call save_paranoid  	DEFAULT_FRAME 0 + +	/* +	 * Save off the CR2 register. If we take a page fault in the NMI then +	 * it could corrupt the CR2 value. If the NMI preempts a page fault +	 * handler before it was able to read the CR2 register, and then the +	 * NMI itself takes a page fault, the page fault that was preempted +	 * will read the information from the NMI page fault and not the +	 * origin fault. Save it off and restore it if it changes. +	 * Use the r12 callee-saved register. +	 */ +	movq %cr2, %r12 +  	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */  	movq %rsp,%rdi  	movq $-1,%rsi  	call do_nmi + +	/* Did the NMI take a page fault? Restore cr2 if it did */ +	movq %cr2, %rcx +	cmpq %rcx, %r12 +	je 1f +	movq %r12, %cr2 +1: +	  	testl %ebx,%ebx				/* swapgs needed? */  	jnz nmi_restore  nmi_swapgs: diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3dafc6003b7..1f5f1d5d2a0 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -294,9 +294,9 @@ void fixup_irqs(void)  		raw_spin_unlock(&desc->lock);  		if (break_affinity && set_affinity) -			printk("Broke affinity for irq %i\n", irq); +			pr_notice("Broke affinity for irq %i\n", irq);  		else if (!set_affinity) -			printk("Cannot set affinity for irq %i\n", irq); +			pr_notice("Cannot set affinity for irq %i\n", irq);  	}  	/* diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index fbdfc691718..4873e62db6a 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -87,6 +87,7 @@  #include <asm/microcode.h>  #include <asm/processor.h>  #include <asm/cpu_device_id.h> +#include <asm/perf_event.h>  MODULE_DESCRIPTION("Microcode Update Driver");  MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); @@ -277,7 +278,6 @@ static int reload_for_cpu(int cpu)  	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;  	int err = 0; -	mutex_lock(µcode_mutex);  	if (uci->valid) {  		enum ucode_state ustate; @@ -288,7 +288,6 @@ static int reload_for_cpu(int cpu)  			if (ustate == UCODE_ERROR)  				err = -EINVAL;  	} -	mutex_unlock(µcode_mutex);  	return err;  } @@ -298,19 +297,31 @@ static ssize_t reload_store(struct device *dev,  			    const char *buf, size_t size)  {  	unsigned long val; -	int cpu = dev->id; -	ssize_t ret = 0; +	int cpu; +	ssize_t ret = 0, tmp_ret;  	ret = kstrtoul(buf, 0, &val);  	if (ret)  		return ret; -	if (val == 1) { -		get_online_cpus(); -		if (cpu_online(cpu)) -			ret = reload_for_cpu(cpu); -		put_online_cpus(); +	if (val != 1) +		return size; + +	get_online_cpus(); +	mutex_lock(µcode_mutex); +	for_each_online_cpu(cpu) { +		tmp_ret = reload_for_cpu(cpu); +		if (tmp_ret != 0) +			pr_warn("Error reloading microcode on CPU %d\n", cpu); + +		/* save retval of the first encountered reload error */ +		if (!ret) +			ret = tmp_ret;  	} +	if (!ret) +		perf_check_microcode(); +	mutex_unlock(µcode_mutex); +	put_online_cpus();  	if (!ret)  		ret = size; @@ -339,7 +350,6 @@ static DEVICE_ATTR(version, 0400, version_show, NULL);  static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL);  static struct attribute *mc_default_attrs[] = { -	&dev_attr_reload.attr,  	&dev_attr_version.attr,  	&dev_attr_processor_flags.attr,  	NULL @@ -504,7 +514,7 @@ static struct notifier_block __refdata mc_cpu_notifier = {  #ifdef MODULE  /* Autoload on Intel and AMD systems */ -static const struct x86_cpu_id microcode_id[] = { +static const struct x86_cpu_id __initconst microcode_id[] = {  #ifdef CONFIG_MICROCODE_INTEL  	{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, },  #endif @@ -516,6 +526,16 @@ static const struct x86_cpu_id microcode_id[] = {  MODULE_DEVICE_TABLE(x86cpu, microcode_id);  #endif +static struct attribute *cpu_root_microcode_attrs[] = { +	&dev_attr_reload.attr, +	NULL +}; + +static struct attribute_group cpu_root_microcode_group = { +	.name  = "microcode", +	.attrs = cpu_root_microcode_attrs, +}; +  static int __init microcode_init(void)  {  	struct cpuinfo_x86 *c = &cpu_data(0); @@ -540,16 +560,25 @@ static int __init microcode_init(void)  	mutex_lock(µcode_mutex);  	error = subsys_interface_register(&mc_cpu_interface); - +	if (!error) +		perf_check_microcode();  	mutex_unlock(µcode_mutex);  	put_online_cpus();  	if (error)  		goto out_pdev; +	error = sysfs_create_group(&cpu_subsys.dev_root->kobj, +				   &cpu_root_microcode_group); + +	if (error) { +		pr_err("Error creating microcode group!\n"); +		goto out_driver; +	} +  	error = microcode_dev_init();  	if (error) -		goto out_driver; +		goto out_ucode_group;  	register_syscore_ops(&mc_syscore_ops);  	register_hotcpu_notifier(&mc_cpu_notifier); @@ -559,7 +588,11 @@ static int __init microcode_init(void)  	return 0; -out_driver: + out_ucode_group: +	sysfs_remove_group(&cpu_subsys.dev_root->kobj, +			   &cpu_root_microcode_group); + + out_driver:  	get_online_cpus();  	mutex_lock(µcode_mutex); @@ -568,7 +601,7 @@ out_driver:  	mutex_unlock(µcode_mutex);  	put_online_cpus(); -out_pdev: + out_pdev:  	platform_device_unregister(microcode_pdev);  	return error; @@ -584,6 +617,9 @@ static void __exit microcode_exit(void)  	unregister_hotcpu_notifier(&mc_cpu_notifier);  	unregister_syscore_ops(&mc_syscore_ops); +	sysfs_remove_group(&cpu_subsys.dev_root->kobj, +			   &cpu_root_microcode_group); +  	get_online_cpus();  	mutex_lock(µcode_mutex); diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index f21fd94ac89..202494d2ec6 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -15,6 +15,9 @@      along with this program; if not, write to the Free Software      Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/moduleloader.h>  #include <linux/elf.h>  #include <linux/vmalloc.h> @@ -30,9 +33,14 @@  #include <asm/pgtable.h>  #if 0 -#define DEBUGP printk +#define DEBUGP(fmt, ...)				\ +	printk(KERN_DEBUG fmt, ##__VA_ARGS__)  #else -#define DEBUGP(fmt...) +#define DEBUGP(fmt, ...)				\ +do {							\ +	if (0)						\ +		printk(KERN_DEBUG fmt, ##__VA_ARGS__);	\ +} while (0)  #endif  void *module_alloc(unsigned long size) @@ -56,8 +64,8 @@ int apply_relocate(Elf32_Shdr *sechdrs,  	Elf32_Sym *sym;  	uint32_t *location; -	DEBUGP("Applying relocate section %u to %u\n", relsec, -	       sechdrs[relsec].sh_info); +	DEBUGP("Applying relocate section %u to %u\n", +	       relsec, sechdrs[relsec].sh_info);  	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {  		/* This is where to make the change */  		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr @@ -77,7 +85,7 @@ int apply_relocate(Elf32_Shdr *sechdrs,  			*location += sym->st_value - (uint32_t)location;  			break;  		default: -			printk(KERN_ERR "module %s: Unknown relocation: %u\n", +			pr_err("%s: Unknown relocation: %u\n",  			       me->name, ELF32_R_TYPE(rel[i].r_info));  			return -ENOEXEC;  		} @@ -97,8 +105,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  	void *loc;  	u64 val; -	DEBUGP("Applying relocate section %u to %u\n", relsec, -	       sechdrs[relsec].sh_info); +	DEBUGP("Applying relocate section %u to %u\n", +	       relsec, sechdrs[relsec].sh_info);  	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {  		/* This is where to make the change */  		loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr @@ -110,8 +118,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  			+ ELF64_R_SYM(rel[i].r_info);  		DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", -			(int)ELF64_R_TYPE(rel[i].r_info), -			sym->st_value, rel[i].r_addend, (u64)loc); +		       (int)ELF64_R_TYPE(rel[i].r_info), +		       sym->st_value, rel[i].r_addend, (u64)loc);  		val = sym->st_value + rel[i].r_addend; @@ -140,7 +148,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  #endif  			break;  		default: -			printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n", +			pr_err("%s: Unknown rela relocation: %llu\n",  			       me->name, ELF64_R_TYPE(rel[i].r_info));  			return -ENOEXEC;  		} @@ -148,9 +156,9 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  	return 0;  overflow: -	printk(KERN_ERR "overflow in relocation type %d val %Lx\n", +	pr_err("overflow in relocation type %d val %Lx\n",  	       (int)ELF64_R_TYPE(rel[i].r_info), val); -	printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", +	pr_err("`%s' likely not compiled with -mcmodel=kernel\n",  	       me->name);  	return -ENOEXEC;  } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index a0b2f84457b..f84f5c57de3 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -365,8 +365,9 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)  #ifdef CONFIG_X86_32  /*   * For i386, NMIs use the same stack as the kernel, and we can - * add a workaround to the iret problem in C. Simply have 3 states - * the NMI can be in. + * add a workaround to the iret problem in C (preventing nested + * NMIs if an NMI takes a trap). Simply have 3 states the NMI + * can be in:   *   *  1) not running   *  2) executing @@ -383,32 +384,50 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)   * If an NMI hits a breakpoint that executes an iret, another   * NMI can preempt it. We do not want to allow this new NMI   * to run, but we want to execute it when the first one finishes. - * We set the state to "latched", and the first NMI will perform - * an cmpxchg on the state, and if it doesn't successfully - * reset the state to "not running" it will restart the next - * NMI. + * We set the state to "latched", and the exit of the first NMI will + * perform a dec_return, if the result is zero (NOT_RUNNING), then + * it will simply exit the NMI handler. If not, the dec_return + * would have set the state to NMI_EXECUTING (what we want it to + * be when we are running). In this case, we simply jump back + * to rerun the NMI handler again, and restart the 'latched' NMI. + * + * No trap (breakpoint or page fault) should be hit before nmi_restart, + * thus there is no race between the first check of state for NOT_RUNNING + * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs + * at this point. + * + * In case the NMI takes a page fault, we need to save off the CR2 + * because the NMI could have preempted another page fault and corrupt + * the CR2 that is about to be read. As nested NMIs must be restarted + * and they can not take breakpoints or page faults, the update of the + * CR2 must be done before converting the nmi state back to NOT_RUNNING. + * Otherwise, there would be a race of another nested NMI coming in + * after setting state to NOT_RUNNING but before updating the nmi_cr2.   */  enum nmi_states { -	NMI_NOT_RUNNING, +	NMI_NOT_RUNNING = 0,  	NMI_EXECUTING,  	NMI_LATCHED,  };  static DEFINE_PER_CPU(enum nmi_states, nmi_state); +static DEFINE_PER_CPU(unsigned long, nmi_cr2);  #define nmi_nesting_preprocess(regs)					\  	do {								\ -		if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) {	\ -			__get_cpu_var(nmi_state) = NMI_LATCHED;		\ +		if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {	\ +			this_cpu_write(nmi_state, NMI_LATCHED);		\  			return;						\  		}							\ -	nmi_restart:							\ -		__get_cpu_var(nmi_state) = NMI_EXECUTING;		\ -	} while (0) +		this_cpu_write(nmi_state, NMI_EXECUTING);		\ +		this_cpu_write(nmi_cr2, read_cr2());			\ +	} while (0);							\ +	nmi_restart:  #define nmi_nesting_postprocess()					\  	do {								\ -		if (cmpxchg(&__get_cpu_var(nmi_state),			\ -		    NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING)	\ +		if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))	\ +			write_cr2(this_cpu_read(nmi_cr2));		\ +		if (this_cpu_dec_return(nmi_state))			\  			goto nmi_restart;				\  	} while (0)  #else /* x86_64 */ diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c index 149b8d9c6ad..6d9582ec032 100644 --- a/arch/x86/kernel/nmi_selftest.c +++ b/arch/x86/kernel/nmi_selftest.c @@ -42,7 +42,8 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs)  static void __init init_nmi_testsuite(void)  {  	/* trap all the unknown NMIs we may generate */ -	register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); +	register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk", +			__initdata);  }  static void __init cleanup_nmi_testsuite(void) @@ -64,8 +65,8 @@ static void __init test_nmi_ipi(struct cpumask *mask)  {  	unsigned long timeout; -	if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback, -				 NMI_FLAG_FIRST, "nmi_selftest")) { +	if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, +				 NMI_FLAG_FIRST, "nmi_selftest", __initdata)) {  		nmi_fail = FAILURE;  		return;  	} diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 9ce885996fd..17fff18a103 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -352,9 +352,7 @@ struct pv_cpu_ops pv_cpu_ops = {  #endif  	.wbinvd = native_wbinvd,  	.read_msr = native_read_msr_safe, -	.rdmsr_regs = native_rdmsr_safe_regs,  	.write_msr = native_write_msr_safe, -	.wrmsr_regs = native_wrmsr_safe_regs,  	.read_tsc = native_read_tsc,  	.read_pmc = native_read_pmc,  	.read_tscp = native_read_tscp, diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index b72838bae64..299d49302e7 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -22,6 +22,8 @@   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA   */ +#define pr_fmt(fmt) "Calgary: " fmt +  #include <linux/kernel.h>  #include <linux/init.h>  #include <linux/types.h> @@ -245,7 +247,7 @@ static unsigned long iommu_range_alloc(struct device *dev,  		offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0,  					  npages, 0, boundary_size, 0);  		if (offset == ~0UL) { -			printk(KERN_WARNING "Calgary: IOMMU full.\n"); +			pr_warn("IOMMU full\n");  			spin_unlock_irqrestore(&tbl->it_lock, flags);  			if (panic_on_overflow)  				panic("Calgary: fix the allocator.\n"); @@ -271,8 +273,8 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,  	entry = iommu_range_alloc(dev, tbl, npages);  	if (unlikely(entry == DMA_ERROR_CODE)) { -		printk(KERN_WARNING "Calgary: failed to allocate %u pages in " -		       "iommu %p\n", npages, tbl); +		pr_warn("failed to allocate %u pages in iommu %p\n", +			npages, tbl);  		return DMA_ERROR_CODE;  	} @@ -561,8 +563,7 @@ static void calgary_tce_cache_blast(struct iommu_table *tbl)  		i++;  	} while ((val & 0xff) != 0xff && i < 100);  	if (i == 100) -		printk(KERN_WARNING "Calgary: PCI bus not quiesced, " -		       "continuing anyway\n"); +		pr_warn("PCI bus not quiesced, continuing anyway\n");  	/* invalidate TCE cache */  	target = calgary_reg(bbar, tar_offset(tbl->it_busno)); @@ -604,8 +605,7 @@ begin:  		i++;  	} while ((val64 & 0xff) != 0xff && i < 100);  	if (i == 100) -		printk(KERN_WARNING "CalIOC2: PCI bus not quiesced, " -		       "continuing anyway\n"); +		pr_warn("CalIOC2: PCI bus not quiesced, continuing anyway\n");  	/* 3. poll Page Migration DEBUG for SoftStopFault */  	target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG); @@ -617,8 +617,7 @@ begin:  		if (++count < 100)  			goto begin;  		else { -			printk(KERN_WARNING "CalIOC2: too many SoftStopFaults, " -			       "aborting TCE cache flush sequence!\n"); +			pr_warn("CalIOC2: too many SoftStopFaults, aborting TCE cache flush sequence!\n");  			return; /* pray for the best */  		}  	} @@ -840,8 +839,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl)  	plssr = be32_to_cpu(readl(target));  	/* If no error, the agent ID in the CSR is not valid */ -	printk(KERN_EMERG "Calgary: DMA error on Calgary PHB 0x%x, " -	       "0x%08x@CSR 0x%08x@PLSSR\n", tbl->it_busno, csr, plssr); +	pr_emerg("DMA error on Calgary PHB 0x%x, 0x%08x@CSR 0x%08x@PLSSR\n", +		 tbl->it_busno, csr, plssr);  }  static void calioc2_dump_error_regs(struct iommu_table *tbl) @@ -867,22 +866,21 @@ static void calioc2_dump_error_regs(struct iommu_table *tbl)  	target = calgary_reg(bbar, phboff | 0x800);  	mck = be32_to_cpu(readl(target)); -	printk(KERN_EMERG "Calgary: DMA error on CalIOC2 PHB 0x%x\n", -	       tbl->it_busno); +	pr_emerg("DMA error on CalIOC2 PHB 0x%x\n", tbl->it_busno); -	printk(KERN_EMERG "Calgary: 0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n", -	       csr, plssr, csmr, mck); +	pr_emerg("0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n", +		 csr, plssr, csmr, mck);  	/* dump rest of error regs */ -	printk(KERN_EMERG "Calgary: "); +	pr_emerg("");  	for (i = 0; i < ARRAY_SIZE(errregs); i++) {  		/* err regs are at 0x810 - 0x870 */  		erroff = (0x810 + (i * 0x10));  		target = calgary_reg(bbar, phboff | erroff);  		errregs[i] = be32_to_cpu(readl(target)); -		printk("0x%08x@0x%lx ", errregs[i], erroff); +		pr_cont("0x%08x@0x%lx ", errregs[i], erroff);  	} -	printk("\n"); +	pr_cont("\n");  	/* root complex status */  	target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 735279e54e5..ef6a8456f71 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/errno.h>  #include <linux/kernel.h>  #include <linux/mm.h> @@ -145,16 +147,14 @@ void show_regs_common(void)  	/* Board Name is optional */  	board = dmi_get_system_info(DMI_BOARD_NAME); -	printk(KERN_CONT "\n"); -	printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s", -		current->pid, current->comm, print_tainted(), -		init_utsname()->release, -		(int)strcspn(init_utsname()->version, " "), -		init_utsname()->version); -	printk(KERN_CONT " %s %s", vendor, product); -	if (board) -		printk(KERN_CONT "/%s", board); -	printk(KERN_CONT "\n"); +	printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s %s%s%s\n", +	       current->pid, current->comm, print_tainted(), +	       init_utsname()->release, +	       (int)strcspn(init_utsname()->version, " "), +	       init_utsname()->version, +	       vendor, product, +	       board ? "/" : "", +	       board ? board : "");  }  void flush_thread(void) @@ -645,7 +645,7 @@ static void amd_e400_idle(void)  			amd_e400_c1e_detected = true;  			if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))  				mark_tsc_unstable("TSC halt in AMD C1E"); -			printk(KERN_INFO "System has AMD C1E enabled\n"); +			pr_info("System has AMD C1E enabled\n");  		}  	} @@ -659,8 +659,7 @@ static void amd_e400_idle(void)  			 */  			clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,  					   &cpu); -			printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", -			       cpu); +			pr_info("Switch to broadcast mode on CPU%d\n", cpu);  		}  		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); @@ -681,8 +680,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)  {  #ifdef CONFIG_SMP  	if (pm_idle == poll_idle && smp_num_siblings > 1) { -		printk_once(KERN_WARNING "WARNING: polling idle and HT enabled," -			" performance may degrade.\n"); +		pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");  	}  #endif  	if (pm_idle) @@ -692,11 +690,11 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)  		/*  		 * One CPU supports mwait => All CPUs supports mwait  		 */ -		printk(KERN_INFO "using mwait in idle threads.\n"); +		pr_info("using mwait in idle threads\n");  		pm_idle = mwait_idle;  	} else if (cpu_has_amd_erratum(amd_erratum_400)) {  		/* E400: APIC timer interrupt does not wake up CPU from C1e */ -		printk(KERN_INFO "using AMD E400 aware idle routine\n"); +		pr_info("using AMD E400 aware idle routine\n");  		pm_idle = amd_e400_idle;  	} else  		pm_idle = default_idle; @@ -715,7 +713,7 @@ static int __init idle_setup(char *str)  		return -EINVAL;  	if (!strcmp(str, "poll")) { -		printk("using polling idle threads.\n"); +		pr_info("using polling idle threads\n");  		pm_idle = poll_idle;  		boot_option_idle_override = IDLE_POLL;  	} else if (!strcmp(str, "mwait")) { diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 61cdf7fdf09..0a980c9d7cb 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -117,10 +117,10 @@ void release_thread(struct task_struct *dead_task)  {  	if (dead_task->mm) {  		if (dead_task->mm->context.size) { -			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", -					dead_task->comm, -					dead_task->mm->context.ldt, -					dead_task->mm->context.size); +			pr_warn("WARNING: dead process %8s still has LDT? <%p/%d>\n", +				dead_task->comm, +				dead_task->mm->context.ldt, +				dead_task->mm->context.size);  			BUG();  		}  	} @@ -466,7 +466,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)  			task->thread.gs = addr;  			if (doit) {  				load_gs_index(0); -				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); +				ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);  			}  		}  		put_cpu(); @@ -494,7 +494,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)  				/* set the selector to 0 to not confuse  				   __switch_to */  				loadsegment(fs, 0); -				ret = checking_wrmsrl(MSR_FS_BASE, addr); +				ret = wrmsrl_safe(MSR_FS_BASE, addr);  			}  		}  		put_cpu(); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 5de92f1abd7..52190a938b4 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/module.h>  #include <linux/reboot.h>  #include <linux/init.h> @@ -20,14 +22,12 @@  #include <asm/virtext.h>  #include <asm/cpu.h>  #include <asm/nmi.h> +#include <asm/smp.h> -#ifdef CONFIG_X86_32 -# include <linux/ctype.h> -# include <linux/mc146818rtc.h> -# include <asm/realmode.h> -#else -# include <asm/x86_init.h> -#endif +#include <linux/ctype.h> +#include <linux/mc146818rtc.h> +#include <asm/realmode.h> +#include <asm/x86_init.h>  /*   * Power off function, if any @@ -49,7 +49,7 @@ int reboot_force;   */  static int reboot_default = 1; -#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) +#ifdef CONFIG_SMP  static int reboot_cpu = -1;  #endif @@ -67,8 +67,8 @@ bool port_cf9_safe = false;   * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]   * warm   Don't set the cold reboot flag   * cold   Set the cold reboot flag - * bios   Reboot by jumping through the BIOS (only for X86_32) - * smp    Reboot by executing reset on BSP or other CPU (only for X86_32) + * bios   Reboot by jumping through the BIOS + * smp    Reboot by executing reset on BSP or other CPU   * triple Force a triple fault (init)   * kbd    Use the keyboard controller. cold reset (default)   * acpi   Use the RESET_REG in the FADT @@ -95,7 +95,6 @@ static int __init reboot_setup(char *str)  			reboot_mode = 0;  			break; -#ifdef CONFIG_X86_32  #ifdef CONFIG_SMP  		case 's':  			if (isdigit(*(str+1))) { @@ -112,7 +111,6 @@ static int __init reboot_setup(char *str)  #endif /* CONFIG_SMP */  		case 'b': -#endif  		case 'a':  		case 'k':  		case 't': @@ -138,7 +136,6 @@ static int __init reboot_setup(char *str)  __setup("reboot=", reboot_setup); -#ifdef CONFIG_X86_32  /*   * Reboot options and system auto-detection code provided by   * Dell Inc. so their systems "just work". :-) @@ -152,16 +149,14 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)  {  	if (reboot_type != BOOT_BIOS) {  		reboot_type = BOOT_BIOS; -		printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident); +		pr_info("%s series board detected. Selecting %s-method for reboots.\n", +			"BIOS", d->ident);  	}  	return 0;  } -void machine_real_restart(unsigned int type) +void __noreturn machine_real_restart(unsigned int type)  { -	void (*restart_lowmem)(unsigned int) = (void (*)(unsigned int)) -		real_mode_header->machine_real_restart_asm; -  	local_irq_disable();  	/* @@ -181,25 +176,28 @@ void machine_real_restart(unsigned int type)  	/*  	 * Switch back to the initial page table.  	 */ +#ifdef CONFIG_X86_32  	load_cr3(initial_page_table); - -	/* -	 * Write 0x1234 to absolute memory location 0x472.  The BIOS reads -	 * this on booting to tell it to "Bypass memory test (also warm -	 * boot)".  This seems like a fairly standard thing that gets set by -	 * REBOOT.COM programs, and the previous reset routine did this -	 * too. */ -	*((unsigned short *)0x472) = reboot_mode; +#else +	write_cr3(real_mode_header->trampoline_pgd); +#endif  	/* Jump to the identity-mapped low memory code */ -	restart_lowmem(type); +#ifdef CONFIG_X86_32 +	asm volatile("jmpl *%0" : : +		     "rm" (real_mode_header->machine_real_restart_asm), +		     "a" (type)); +#else +	asm volatile("ljmpl *%0" : : +		     "m" (real_mode_header->machine_real_restart_asm), +		     "D" (type)); +#endif +	unreachable();  }  #ifdef CONFIG_APM_MODULE  EXPORT_SYMBOL(machine_real_restart);  #endif -#endif /* CONFIG_X86_32 */ -  /*   * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot   */ @@ -207,8 +205,8 @@ static int __init set_pci_reboot(const struct dmi_system_id *d)  {  	if (reboot_type != BOOT_CF9) {  		reboot_type = BOOT_CF9; -		printk(KERN_INFO "%s series board detected. " -		       "Selecting PCI-method for reboots.\n", d->ident); +		pr_info("%s series board detected. Selecting %s-method for reboots.\n", +			"PCI", d->ident);  	}  	return 0;  } @@ -217,17 +215,16 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d)  {  	if (reboot_type != BOOT_KBD) {  		reboot_type = BOOT_KBD; -		printk(KERN_INFO "%s series board detected. Selecting KBD-method for reboot.\n", d->ident); +		pr_info("%s series board detected. Selecting %s-method for reboot.\n", +			"KBD", d->ident);  	}  	return 0;  }  /* - * This is a single dmi_table handling all reboot quirks.  Note that - * REBOOT_BIOS is only available for 32bit + * This is a single dmi_table handling all reboot quirks.   */  static struct dmi_system_id __initdata reboot_dmi_table[] = { -#ifdef CONFIG_X86_32  	{	/* Handle problems with rebooting on Dell E520's */  		.callback = set_bios_reboot,  		.ident = "Dell E520", @@ -377,7 +374,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {  			DMI_MATCH(DMI_BOARD_NAME, "P4S800"),  		},  	}, -#endif /* CONFIG_X86_32 */  	{	/* Handle reboot issue on Acer Aspire one */  		.callback = set_kbd_reboot, @@ -584,13 +580,11 @@ static void native_machine_emergency_restart(void)  			reboot_type = BOOT_KBD;  			break; -#ifdef CONFIG_X86_32  		case BOOT_BIOS:  			machine_real_restart(MRR_BIOS);  			reboot_type = BOOT_KBD;  			break; -#endif  		case BOOT_ACPI:  			acpi_reboot(); @@ -632,12 +626,10 @@ void native_machine_shutdown(void)  	/* The boot cpu is always logical cpu 0 */  	int reboot_cpu_id = 0; -#ifdef CONFIG_X86_32  	/* See if there has been given a command line override */  	if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) &&  		cpu_online(reboot_cpu))  		reboot_cpu_id = reboot_cpu; -#endif  	/* Make certain the cpu I'm about to reboot on is online */  	if (!cpu_online(reboot_cpu_id)) @@ -678,7 +670,7 @@ static void __machine_emergency_restart(int emergency)  static void native_machine_restart(char *__unused)  { -	printk("machine restart\n"); +	pr_notice("machine restart\n");  	if (!reboot_force)  		machine_shutdown(); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 16be6dc14db..f4b9b80e1b9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1031,8 +1031,6 @@ void __init setup_arch(char **cmdline_p)  	x86_init.timers.wallclock_init(); -	x86_platform.wallclock_init(); -  	mcheck_init();  	arch_init_ideal_nops(); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 21af737053a..b280908a376 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -6,6 +6,9 @@   *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes   *  2000-2002   x86-64 support by Andi Kleen   */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/sched.h>  #include <linux/mm.h>  #include <linux/smp.h> @@ -814,7 +817,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)  		       me->comm, me->pid, where, frame,  		       regs->ip, regs->sp, regs->orig_ax);  		print_vma_addr(" in ", regs->ip); -		printk(KERN_CONT "\n"); +		pr_cont("\n");  	}  	force_sig(SIGSEGV, me); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7bd8a082365..c1a310fb830 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1,4 +1,4 @@ -/* + /*   *	x86 SMP booting functions   *   *	(c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> @@ -39,6 +39,8 @@   *	Glauber Costa		:	i386 and x86_64 integration   */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/init.h>  #include <linux/smp.h>  #include <linux/module.h> @@ -184,7 +186,7 @@ static void __cpuinit smp_callin(void)  	 * boards)  	 */ -	pr_debug("CALLIN, before setup_local_APIC().\n"); +	pr_debug("CALLIN, before setup_local_APIC()\n");  	if (apic->smp_callin_clear_local_apic)  		apic->smp_callin_clear_local_apic();  	setup_local_APIC(); @@ -255,22 +257,13 @@ notrace static void __cpuinit start_secondary(void *unused)  	check_tsc_sync_target();  	/* -	 * We need to hold call_lock, so there is no inconsistency -	 * between the time smp_call_function() determines number of -	 * IPI recipients, and the time when the determination is made -	 * for which cpus receive the IPI. Holding this -	 * lock helps us to not include this cpu in a currently in progress -	 * smp_call_function(). -	 *  	 * We need to hold vector_lock so there the set of online cpus  	 * does not change while we are assigning vectors to cpus.  Holding  	 * this lock ensures we don't half assign or remove an irq from a cpu.  	 */ -	ipi_call_lock();  	lock_vector_lock();  	set_cpu_online(smp_processor_id(), true);  	unlock_vector_lock(); -	ipi_call_unlock();  	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;  	x86_platform.nmi_init(); @@ -432,17 +425,16 @@ static void impress_friends(void)  	/*  	 * Allow the user to impress friends.  	 */ -	pr_debug("Before bogomips.\n"); +	pr_debug("Before bogomips\n");  	for_each_possible_cpu(cpu)  		if (cpumask_test_cpu(cpu, cpu_callout_mask))  			bogosum += cpu_data(cpu).loops_per_jiffy; -	printk(KERN_INFO -		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n", +	pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",  		num_online_cpus(),  		bogosum/(500000/HZ),  		(bogosum/(5000/HZ))%100); -	pr_debug("Before bogocount - setting activated=1.\n"); +	pr_debug("Before bogocount - setting activated=1\n");  }  void __inquire_remote_apic(int apicid) @@ -452,18 +444,17 @@ void __inquire_remote_apic(int apicid)  	int timeout;  	u32 status; -	printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid); +	pr_info("Inquiring remote APIC 0x%x...\n", apicid);  	for (i = 0; i < ARRAY_SIZE(regs); i++) { -		printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]); +		pr_info("... APIC 0x%x %s: ", apicid, names[i]);  		/*  		 * Wait for idle.  		 */  		status = safe_apic_wait_icr_idle();  		if (status) -			printk(KERN_CONT -			       "a previous APIC delivery may have failed\n"); +			pr_cont("a previous APIC delivery may have failed\n");  		apic_icr_write(APIC_DM_REMRD | regs[i], apicid); @@ -476,10 +467,10 @@ void __inquire_remote_apic(int apicid)  		switch (status) {  		case APIC_ICR_RR_VALID:  			status = apic_read(APIC_RRR); -			printk(KERN_CONT "%08x\n", status); +			pr_cont("%08x\n", status);  			break;  		default: -			printk(KERN_CONT "failed\n"); +			pr_cont("failed\n");  		}  	}  } @@ -513,12 +504,12 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)  			apic_write(APIC_ESR, 0);  		accept_status = (apic_read(APIC_ESR) & 0xEF);  	} -	pr_debug("NMI sent.\n"); +	pr_debug("NMI sent\n");  	if (send_status) -		printk(KERN_ERR "APIC never delivered???\n"); +		pr_err("APIC never delivered???\n");  	if (accept_status) -		printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); +		pr_err("APIC delivery error (%lx)\n", accept_status);  	return (send_status | accept_status);  } @@ -540,7 +531,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)  		apic_read(APIC_ESR);  	} -	pr_debug("Asserting INIT.\n"); +	pr_debug("Asserting INIT\n");  	/*  	 * Turn INIT on target chip @@ -556,7 +547,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)  	mdelay(10); -	pr_debug("Deasserting INIT.\n"); +	pr_debug("Deasserting INIT\n");  	/* Target chip */  	/* Send IPI */ @@ -589,14 +580,14 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)  	/*  	 * Run STARTUP IPI loop.  	 */ -	pr_debug("#startup loops: %d.\n", num_starts); +	pr_debug("#startup loops: %d\n", num_starts);  	for (j = 1; j <= num_starts; j++) { -		pr_debug("Sending STARTUP #%d.\n", j); +		pr_debug("Sending STARTUP #%d\n", j);  		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP.  */  			apic_write(APIC_ESR, 0);  		apic_read(APIC_ESR); -		pr_debug("After apic_write.\n"); +		pr_debug("After apic_write\n");  		/*  		 * STARTUP IPI @@ -613,7 +604,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)  		 */  		udelay(300); -		pr_debug("Startup point 1.\n"); +		pr_debug("Startup point 1\n");  		pr_debug("Waiting for send to finish...\n");  		send_status = safe_apic_wait_icr_idle(); @@ -628,12 +619,12 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)  		if (send_status || accept_status)  			break;  	} -	pr_debug("After Startup.\n"); +	pr_debug("After Startup\n");  	if (send_status) -		printk(KERN_ERR "APIC never delivered???\n"); +		pr_err("APIC never delivered???\n");  	if (accept_status) -		printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); +		pr_err("APIC delivery error (%lx)\n", accept_status);  	return (send_status | accept_status);  } @@ -647,11 +638,11 @@ static void __cpuinit announce_cpu(int cpu, int apicid)  	if (system_state == SYSTEM_BOOTING) {  		if (node != current_node) {  			if (current_node > (-1)) -				pr_cont(" Ok.\n"); +				pr_cont(" OK\n");  			current_node = node;  			pr_info("Booting Node %3d, Processors ", node);  		} -		pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : ""); +		pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : "");  		return;  	} else  		pr_info("Booting Node %d Processor %d APIC 0x%x\n", @@ -731,9 +722,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)  		/*  		 * allow APs to start initializing.  		 */ -		pr_debug("Before Callout %d.\n", cpu); +		pr_debug("Before Callout %d\n", cpu);  		cpumask_set_cpu(cpu, cpu_callout_mask); -		pr_debug("After Callout %d.\n", cpu); +		pr_debug("After Callout %d\n", cpu);  		/*  		 * Wait 5s total for a response @@ -761,7 +752,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)  				pr_err("CPU%d: Stuck ??\n", cpu);  			else  				/* trampoline code not run */ -				pr_err("CPU%d: Not responding.\n", cpu); +				pr_err("CPU%d: Not responding\n", cpu);  			if (apic->inquire_remote_apic)  				apic->inquire_remote_apic(apicid);  		} @@ -806,7 +797,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)  	if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||  	    !physid_isset(apicid, phys_cpu_present_map) ||  	    !apic->apic_id_valid(apicid)) { -		printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); +		pr_err("%s: bad cpu %d\n", __func__, cpu);  		return -EINVAL;  	} @@ -887,9 +878,8 @@ static int __init smp_sanity_check(unsigned max_cpus)  		unsigned int cpu;  		unsigned nr; -		printk(KERN_WARNING -		       "More than 8 CPUs detected - skipping them.\n" -		       "Use CONFIG_X86_BIGSMP.\n"); +		pr_warn("More than 8 CPUs detected - skipping them\n" +			"Use CONFIG_X86_BIGSMP\n");  		nr = 0;  		for_each_present_cpu(cpu) { @@ -910,8 +900,7 @@ static int __init smp_sanity_check(unsigned max_cpus)  #endif  	if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { -		printk(KERN_WARNING -			"weird, boot CPU (#%d) not listed by the BIOS.\n", +		pr_warn("weird, boot CPU (#%d) not listed by the BIOS\n",  			hard_smp_processor_id());  		physid_set(hard_smp_processor_id(), phys_cpu_present_map); @@ -923,11 +912,10 @@ static int __init smp_sanity_check(unsigned max_cpus)  	 */  	if (!smp_found_config && !acpi_lapic) {  		preempt_enable(); -		printk(KERN_NOTICE "SMP motherboard not detected.\n"); +		pr_notice("SMP motherboard not detected\n");  		disable_smp();  		if (APIC_init_uniprocessor()) -			printk(KERN_NOTICE "Local APIC not detected." -					   " Using dummy APIC emulation.\n"); +			pr_notice("Local APIC not detected. Using dummy APIC emulation.\n");  		return -1;  	} @@ -936,9 +924,8 @@ static int __init smp_sanity_check(unsigned max_cpus)  	 * CPU too, but we do it for the sake of robustness anyway.  	 */  	if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { -		printk(KERN_NOTICE -			"weird, boot CPU (#%d) not listed by the BIOS.\n", -			boot_cpu_physical_apicid); +		pr_notice("weird, boot CPU (#%d) not listed by the BIOS\n", +			  boot_cpu_physical_apicid);  		physid_set(hard_smp_processor_id(), phys_cpu_present_map);  	}  	preempt_enable(); @@ -951,8 +938,7 @@ static int __init smp_sanity_check(unsigned max_cpus)  		if (!disable_apic) {  			pr_err("BIOS bug, local APIC #%d not detected!...\n",  				boot_cpu_physical_apicid); -			pr_err("... forcing use of dummy APIC emulation." -				"(tell your hw vendor)\n"); +			pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n");  		}  		smpboot_clear_io_apic();  		disable_ioapic_support(); @@ -965,7 +951,7 @@ static int __init smp_sanity_check(unsigned max_cpus)  	 * If SMP should be disabled, then really disable it!  	 */  	if (!max_cpus) { -		printk(KERN_INFO "SMP mode deactivated.\n"); +		pr_info("SMP mode deactivated\n");  		smpboot_clear_io_apic();  		connect_bsp_APIC(); @@ -1017,7 +1003,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)  	if (smp_sanity_check(max_cpus) < 0) { -		printk(KERN_INFO "SMP disabled\n"); +		pr_info("SMP disabled\n");  		disable_smp();  		goto out;  	} @@ -1055,7 +1041,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)  	 * Set up local APIC timer on boot CPU.  	 */ -	printk(KERN_INFO "CPU%d: ", 0); +	pr_info("CPU%d: ", 0);  	print_cpu_info(&cpu_data(0));  	x86_init.timers.setup_percpu_clockev(); @@ -1105,7 +1091,7 @@ void __init native_smp_prepare_boot_cpu(void)  void __init native_smp_cpus_done(unsigned int max_cpus)  { -	pr_debug("Boot done.\n"); +	pr_debug("Boot done\n");  	nmi_selftest();  	impress_friends(); @@ -1166,8 +1152,7 @@ __init void prefill_possible_map(void)  	/* nr_cpu_ids could be reduced via nr_cpus= */  	if (possible > nr_cpu_ids) { -		printk(KERN_WARNING -			"%d Processors exceeds NR_CPUS limit of %d\n", +		pr_warn("%d Processors exceeds NR_CPUS limit of %d\n",  			possible, nr_cpu_ids);  		possible = nr_cpu_ids;  	} @@ -1176,13 +1161,12 @@ __init void prefill_possible_map(void)  	if (!setup_max_cpus)  #endif  	if (possible > i) { -		printk(KERN_WARNING -			"%d Processors exceeds max_cpus limit of %u\n", +		pr_warn("%d Processors exceeds max_cpus limit of %u\n",  			possible, setup_max_cpus);  		possible = i;  	} -	printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", +	pr_info("Allowing %d CPUs, %d hotplug CPUs\n",  		possible, max_t(int, possible - num_processors, 0));  	for (i = 0; i < possible; i++) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 05b31d92f69..b481341c936 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -9,6 +9,9 @@  /*   * Handle hardware traps and faults.   */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/interrupt.h>  #include <linux/kallsyms.h>  #include <linux/spinlock.h> @@ -143,12 +146,11 @@ trap_signal:  #ifdef CONFIG_X86_64  	if (show_unhandled_signals && unhandled_signal(tsk, signr) &&  	    printk_ratelimit()) { -		printk(KERN_INFO -		       "%s[%d] trap %s ip:%lx sp:%lx error:%lx", -		       tsk->comm, tsk->pid, str, -		       regs->ip, regs->sp, error_code); +		pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx", +			tsk->comm, tsk->pid, str, +			regs->ip, regs->sp, error_code);  		print_vma_addr(" in ", regs->ip); -		printk("\n"); +		pr_cont("\n");  	}  #endif @@ -269,12 +271,11 @@ do_general_protection(struct pt_regs *regs, long error_code)  	if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&  			printk_ratelimit()) { -		printk(KERN_INFO -			"%s[%d] general protection ip:%lx sp:%lx error:%lx", +		pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx",  			tsk->comm, task_pid_nr(tsk),  			regs->ip, regs->sp, error_code);  		print_vma_addr(" in ", regs->ip); -		printk("\n"); +		pr_cont("\n");  	}  	force_sig(SIGSEGV, tsk); @@ -570,7 +571,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)  	conditional_sti(regs);  #if 0  	/* No need to warn about this any longer. */ -	printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); +	pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");  #endif  } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index fc0a147e372..cfa5d4f7ca5 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/kernel.h>  #include <linux/sched.h>  #include <linux/init.h> @@ -84,8 +86,7 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable);  #ifdef CONFIG_X86_TSC  int __init notsc_setup(char *str)  { -	printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " -			"cannot disable TSC completely.\n"); +	pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n");  	tsc_disabled = 1;  	return 1;  } @@ -373,7 +374,7 @@ static unsigned long quick_pit_calibrate(void)  			goto success;  		}  	} -	printk("Fast TSC calibration failed\n"); +	pr_err("Fast TSC calibration failed\n");  	return 0;  success: @@ -392,7 +393,7 @@ success:  	 */  	delta *= PIT_TICK_RATE;  	do_div(delta, i*256*1000); -	printk("Fast TSC calibration using PIT\n"); +	pr_info("Fast TSC calibration using PIT\n");  	return delta;  } @@ -487,9 +488,8 @@ unsigned long native_calibrate_tsc(void)  		 * use the reference value, as it is more precise.  		 */  		if (delta >= 90 && delta <= 110) { -			printk(KERN_INFO -			       "TSC: PIT calibration matches %s. %d loops\n", -			       hpet ? "HPET" : "PMTIMER", i + 1); +			pr_info("PIT calibration matches %s. %d loops\n", +				hpet ? "HPET" : "PMTIMER", i + 1);  			return tsc_ref_min;  		} @@ -511,38 +511,36 @@ unsigned long native_calibrate_tsc(void)  	 */  	if (tsc_pit_min == ULONG_MAX) {  		/* PIT gave no useful value */ -		printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); +		pr_warn("Unable to calibrate against PIT\n");  		/* We don't have an alternative source, disable TSC */  		if (!hpet && !ref1 && !ref2) { -			printk("TSC: No reference (HPET/PMTIMER) available\n"); +			pr_notice("No reference (HPET/PMTIMER) available\n");  			return 0;  		}  		/* The alternative source failed as well, disable TSC */  		if (tsc_ref_min == ULONG_MAX) { -			printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " -			       "failed.\n"); +			pr_warn("HPET/PMTIMER calibration failed\n");  			return 0;  		}  		/* Use the alternative source */ -		printk(KERN_INFO "TSC: using %s reference calibration\n", -		       hpet ? "HPET" : "PMTIMER"); +		pr_info("using %s reference calibration\n", +			hpet ? "HPET" : "PMTIMER");  		return tsc_ref_min;  	}  	/* We don't have an alternative source, use the PIT calibration value */  	if (!hpet && !ref1 && !ref2) { -		printk(KERN_INFO "TSC: Using PIT calibration value\n"); +		pr_info("Using PIT calibration value\n");  		return tsc_pit_min;  	}  	/* The alternative source failed, use the PIT calibration value */  	if (tsc_ref_min == ULONG_MAX) { -		printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " -		       "Using PIT calibration\n"); +		pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n");  		return tsc_pit_min;  	} @@ -551,9 +549,9 @@ unsigned long native_calibrate_tsc(void)  	 * the PIT value as we know that there are PMTIMERs around  	 * running at double speed. At least we let the user know:  	 */ -	printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", -	       hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); -	printk(KERN_INFO "TSC: Using PIT calibration value\n"); +	pr_warn("PIT calibration deviates from %s: %lu %lu\n", +		hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); +	pr_info("Using PIT calibration value\n");  	return tsc_pit_min;  } @@ -785,7 +783,7 @@ void mark_tsc_unstable(char *reason)  		tsc_unstable = 1;  		sched_clock_stable = 0;  		disable_sched_clock_irqtime(); -		printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); +		pr_info("Marking TSC unstable due to %s\n", reason);  		/* Change only the rating, when not registered */  		if (clocksource_tsc.mult)  			clocksource_mark_unstable(&clocksource_tsc); @@ -912,9 +910,9 @@ static void tsc_refine_calibration_work(struct work_struct *work)  		goto out;  	tsc_khz = freq; -	printk(KERN_INFO "Refined TSC clocksource calibration: " -		"%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000, -					(unsigned long)tsc_khz % 1000); +	pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n", +		(unsigned long)tsc_khz / 1000, +		(unsigned long)tsc_khz % 1000);  out:  	clocksource_register_khz(&clocksource_tsc, tsc_khz); @@ -970,9 +968,9 @@ void __init tsc_init(void)  		return;  	} -	printk("Detected %lu.%03lu MHz processor.\n", -			(unsigned long)cpu_khz / 1000, -			(unsigned long)cpu_khz % 1000); +	pr_info("Detected %lu.%03lu MHz processor\n", +		(unsigned long)cpu_khz / 1000, +		(unsigned long)cpu_khz % 1000);  	/*  	 * Secondary CPUs do not run through tsc_init(), so set up diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index dc4e910a7d9..36fd42091fa 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -409,9 +409,10 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,   * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.   * @mm: the probed address space.   * @arch_uprobe: the probepoint information. + * @addr: virtual address at which to install the probepoint   * Return 0 on success or a -ve number on error.   */ -int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm) +int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)  {  	int ret;  	struct insn insn; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 255f58ae71e..54abcc0baf2 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -28,6 +28,8 @@   *   */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/capability.h>  #include <linux/errno.h>  #include <linux/interrupt.h> @@ -137,14 +139,14 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)  	local_irq_enable();  	if (!current->thread.vm86_info) { -		printk("no vm86_info: BAD\n"); +		pr_alert("no vm86_info: BAD\n");  		do_exit(SIGSEGV);  	}  	set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask);  	tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs);  	tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap);  	if (tmp) { -		printk("vm86: could not access userspace vm86_info\n"); +		pr_alert("could not access userspace vm86_info\n");  		do_exit(SIGSEGV);  	} diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 8eeb55a551b..992f890283e 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -16,6 +16,7 @@  #include <linux/pci_ids.h>  #include <linux/pci_regs.h>  #include <linux/smp.h> +#include <linux/irq.h>  #include <asm/apic.h>  #include <asm/pci-direct.h> @@ -95,6 +96,18 @@ static void __init set_vsmp_pv_ops(void)  	ctl = readl(address + 4);  	printk(KERN_INFO "vSMP CTL: capabilities:0x%08x  control:0x%08x\n",  	       cap, ctl); + +	/* If possible, let the vSMP foundation route the interrupt optimally */ +#ifdef CONFIG_SMP +	if (cap & ctl & BIT(8)) { +		ctl &= ~BIT(8); +#ifdef CONFIG_PROC_FS +		/* Don't let users change irq affinity via procfs */ +		no_irq_affinity = 1; +#endif +	} +#endif +  	if (cap & ctl & (1 << 4)) {  		/* Setup irq ops and turn on vSMP  IRQ fastpath handling */  		pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); @@ -102,12 +115,11 @@ static void __init set_vsmp_pv_ops(void)  		pv_irq_ops.save_fl  = PV_CALLEE_SAVE(vsmp_save_fl);  		pv_irq_ops.restore_fl  = PV_CALLEE_SAVE(vsmp_restore_fl);  		pv_init_ops.patch = vsmp_patch; -  		ctl &= ~(1 << 4); -		writel(ctl, address + 4); -		ctl = readl(address + 4); -		printk(KERN_INFO "vSMP CTL: control set to:0x%08x\n", ctl);  	} +	writel(ctl, address + 4); +	ctl = readl(address + 4); +	pr_info("vSMP CTL: control set to:0x%08x\n", ctl);  	early_iounmap(address, 8);  } @@ -187,12 +199,36 @@ static void __init vsmp_cap_cpus(void)  #endif  } +static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) +{ +	return hard_smp_processor_id() >> index_msb; +} + +/* + * In vSMP, all cpus should be capable of handling interrupts, regardless of + * the APIC used. + */ +static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask, +					  const struct cpumask *mask) +{ +	cpumask_setall(retmask); +} + +static void vsmp_apic_post_init(void) +{ +	/* need to update phys_pkg_id */ +	apic->phys_pkg_id = apicid_phys_pkg_id; +	apic->vector_allocation_domain = fill_vector_allocation_domain; +} +  void __init vsmp_init(void)  {  	detect_vsmp_box();  	if (!is_vsmp_box())  		return; +	x86_platform.apic_post_init = vsmp_apic_post_init; +  	vsmp_cap_cpus();  	set_vsmp_pv_ops(); diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 5db36caf428..8d141b30904 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -18,6 +18,8 @@   *  use the vDSO.   */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/time.h>  #include <linux/init.h>  #include <linux/kernel.h> @@ -111,18 +113,13 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,  static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,  			      const char *message)  { -	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); -	struct task_struct *tsk; - -	if (!show_unhandled_signals || !__ratelimit(&rs)) +	if (!show_unhandled_signals)  		return; -	tsk = current; - -	printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", -	       level, tsk->comm, task_pid_nr(tsk), -	       message, regs->ip, regs->cs, -	       regs->sp, regs->ax, regs->si, regs->di); +	pr_notice_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", +			      level, current->comm, task_pid_nr(current), +			      message, regs->ip, regs->cs, +			      regs->sp, regs->ax, regs->si, regs->di);  }  static int addr_to_vsyscall_nr(unsigned long addr) diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 9796c2f3d07..6020f6f5927 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -28,6 +28,7 @@ EXPORT_SYMBOL(__put_user_8);  EXPORT_SYMBOL(copy_user_generic_string);  EXPORT_SYMBOL(copy_user_generic_unrolled); +EXPORT_SYMBOL(copy_user_enhanced_fast_string);  EXPORT_SYMBOL(__copy_user_nocache);  EXPORT_SYMBOL(_copy_from_user);  EXPORT_SYMBOL(_copy_to_user); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 35c5e543f55..9f3167e891e 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -29,7 +29,6 @@ void __init x86_init_uint_noop(unsigned int unused) { }  void __init x86_init_pgd_noop(pgd_t *unused) { }  int __init iommu_init_noop(void) { return 0; }  void iommu_shutdown_noop(void) { } -void wallclock_init_noop(void) { }  /*   * The platform setup functions are preset with the default functions @@ -101,7 +100,6 @@ static int default_i8042_detect(void) { return 1; };  struct x86_platform_ops x86_platform = {  	.calibrate_tsc			= native_calibrate_tsc, -	.wallclock_init			= wallclock_init_noop,  	.get_wallclock			= mach_get_cmos_time,  	.set_wallclock			= mach_set_rtc_mmss,  	.iommu_shutdown			= iommu_shutdown_noop, diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index bd18149b2b0..3d3e2070911 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -3,6 +3,9 @@   *   * Author: Suresh Siddha <suresh.b.siddha@intel.com>   */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/bootmem.h>  #include <linux/compat.h>  #include <asm/i387.h> @@ -162,7 +165,7 @@ int save_i387_xstate(void __user *buf)  	BUG_ON(sig_xstate_size < xstate_size);  	if ((unsigned long)buf % 64) -		printk("save_i387_xstate: bad fpstate %p\n", buf); +		pr_err("%s: bad fpstate %p\n", __func__, buf);  	if (!used_math())  		return 0; @@ -422,7 +425,7 @@ static void __init xstate_enable_boot_cpu(void)  	pcntxt_mask = eax + ((u64)edx << 32);  	if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { -		printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n", +		pr_err("FP/SSE not shown under xsave features 0x%llx\n",  		       pcntxt_mask);  		BUG();  	} @@ -445,9 +448,8 @@ static void __init xstate_enable_boot_cpu(void)  	setup_xstate_init(); -	printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, " -	       "cntxt size 0x%x\n", -	       pcntxt_mask, xstate_size); +	pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", +		pcntxt_mask, xstate_size);  }  /* diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 2e88438ffd8..9b7ec1150ab 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -80,10 +80,10 @@ static inline struct kvm_pmc *get_fixed_pmc_idx(struct kvm_pmu *pmu, int idx)  static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx)  { -	if (idx < X86_PMC_IDX_FIXED) +	if (idx < INTEL_PMC_IDX_FIXED)  		return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0);  	else -		return get_fixed_pmc_idx(pmu, idx - X86_PMC_IDX_FIXED); +		return get_fixed_pmc_idx(pmu, idx - INTEL_PMC_IDX_FIXED);  }  void kvm_deliver_pmi(struct kvm_vcpu *vcpu) @@ -291,7 +291,7 @@ static void reprogram_idx(struct kvm_pmu *pmu, int idx)  	if (pmc_is_gp(pmc))  		reprogram_gp_counter(pmc, pmc->eventsel);  	else { -		int fidx = idx - X86_PMC_IDX_FIXED; +		int fidx = idx - INTEL_PMC_IDX_FIXED;  		reprogram_fixed_counter(pmc,  				fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx);  	} @@ -452,7 +452,7 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)  		return;  	pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff, -			X86_PMC_MAX_GENERIC); +			INTEL_PMC_MAX_GENERIC);  	pmu->counter_bitmask[KVM_PMC_GP] =  		((u64)1 << ((entry->eax >> 16) & 0xff)) - 1;  	bitmap_len = (entry->eax >> 24) & 0xff; @@ -462,13 +462,13 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)  		pmu->nr_arch_fixed_counters = 0;  	} else {  		pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), -				X86_PMC_MAX_FIXED); +				INTEL_PMC_MAX_FIXED);  		pmu->counter_bitmask[KVM_PMC_FIXED] =  			((u64)1 << ((entry->edx >> 5) & 0xff)) - 1;  	}  	pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | -		(((1ull << pmu->nr_arch_fixed_counters) - 1) << X86_PMC_IDX_FIXED); +		(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);  	pmu->global_ctrl_mask = ~pmu->global_ctrl;  } @@ -478,15 +478,15 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)  	struct kvm_pmu *pmu = &vcpu->arch.pmu;  	memset(pmu, 0, sizeof(*pmu)); -	for (i = 0; i < X86_PMC_MAX_GENERIC; i++) { +	for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {  		pmu->gp_counters[i].type = KVM_PMC_GP;  		pmu->gp_counters[i].vcpu = vcpu;  		pmu->gp_counters[i].idx = i;  	} -	for (i = 0; i < X86_PMC_MAX_FIXED; i++) { +	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {  		pmu->fixed_counters[i].type = KVM_PMC_FIXED;  		pmu->fixed_counters[i].vcpu = vcpu; -		pmu->fixed_counters[i].idx = i + X86_PMC_IDX_FIXED; +		pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;  	}  	init_irq_work(&pmu->irq_work, trigger_pmi);  	kvm_pmu_cpuid_update(vcpu); @@ -498,13 +498,13 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu)  	int i;  	irq_work_sync(&pmu->irq_work); -	for (i = 0; i < X86_PMC_MAX_GENERIC; i++) { +	for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {  		struct kvm_pmc *pmc = &pmu->gp_counters[i];  		stop_counter(pmc);  		pmc->counter = pmc->eventsel = 0;  	} -	for (i = 0; i < X86_PMC_MAX_FIXED; i++) +	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++)  		stop_counter(&pmu->fixed_counters[i]);  	pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 911d2641f14..62d02e3c3ed 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -710,16 +710,6 @@ TRACE_EVENT(kvm_skinit,  		  __entry->rip, __entry->slb)  ); -#define __print_insn(insn, ilen) ({		                 \ -	int i;							 \ -	const char *ret = p->buffer + p->len;			 \ -								 \ -	for (i = 0; i < ilen; ++i)				 \ -		trace_seq_printf(p, " %02x", insn[i]);		 \ -	trace_seq_printf(p, "%c", 0);				 \ -	ret;							 \ -	}) -  #define KVM_EMUL_INSN_F_CR0_PE (1 << 0)  #define KVM_EMUL_INSN_F_EFL_VM (1 << 1)  #define KVM_EMUL_INSN_F_CS_D   (1 << 2) @@ -786,7 +776,7 @@ TRACE_EVENT(kvm_emulate_insn,  	TP_printk("%x:%llx:%s (%s)%s",  		  __entry->csbase, __entry->rip, -		  __print_insn(__entry->insn, __entry->len), +		  __print_hex(__entry->insn, __entry->len),  		  __print_symbolic(__entry->flags,  				   kvm_trace_symbol_emul_flags),  		  __entry->failed ? " failed" : "" diff --git a/arch/x86/lib/msr-reg-export.c b/arch/x86/lib/msr-reg-export.c index a311cc59b65..8d6ef78b5d0 100644 --- a/arch/x86/lib/msr-reg-export.c +++ b/arch/x86/lib/msr-reg-export.c @@ -1,5 +1,5 @@  #include <linux/module.h>  #include <asm/msr.h> -EXPORT_SYMBOL(native_rdmsr_safe_regs); -EXPORT_SYMBOL(native_wrmsr_safe_regs); +EXPORT_SYMBOL(rdmsr_safe_regs); +EXPORT_SYMBOL(wrmsr_safe_regs); diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index 69fa10623f2..f6d13eefad1 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S @@ -6,13 +6,13 @@  #ifdef CONFIG_X86_64  /* - * int native_{rdmsr,wrmsr}_safe_regs(u32 gprs[8]); + * int {rdmsr,wrmsr}_safe_regs(u32 gprs[8]);   *   * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi]   *   */  .macro op_safe_regs op -ENTRY(native_\op\()_safe_regs) +ENTRY(\op\()_safe_regs)  	CFI_STARTPROC  	pushq_cfi %rbx  	pushq_cfi %rbp @@ -45,13 +45,13 @@ ENTRY(native_\op\()_safe_regs)  	_ASM_EXTABLE(1b, 3b)  	CFI_ENDPROC -ENDPROC(native_\op\()_safe_regs) +ENDPROC(\op\()_safe_regs)  .endm  #else /* X86_32 */  .macro op_safe_regs op -ENTRY(native_\op\()_safe_regs) +ENTRY(\op\()_safe_regs)  	CFI_STARTPROC  	pushl_cfi %ebx  	pushl_cfi %ebp @@ -92,7 +92,7 @@ ENTRY(native_\op\()_safe_regs)  	_ASM_EXTABLE(1b, 3b)  	CFI_ENDPROC -ENDPROC(native_\op\()_safe_regs) +ENDPROC(\op\()_safe_regs)  .endm  #endif diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index bc4e9d84157..e0e6990723e 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -385,7 +385,7 @@ void free_initmem(void)  }  #ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) +void __init free_initrd_mem(unsigned long start, unsigned long end)  {  	/*  	 * end could be not aligned, and We can not align that, diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 303f0863782..b2b94438ff0 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -312,7 +312,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)  			goto fail;  		}  		/* both registers must be reserved */ -		if (num_counters == AMD64_NUM_COUNTERS_F15H) { +		if (num_counters == AMD64_NUM_COUNTERS_CORE) {  			msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);  			msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);  		} else { @@ -514,7 +514,7 @@ static int op_amd_init(struct oprofile_operations *ops)  	ops->create_files = setup_ibs_files;  	if (boot_cpu_data.x86 == 0x15) { -		num_counters = AMD64_NUM_COUNTERS_F15H; +		num_counters = AMD64_NUM_COUNTERS_CORE;  	} else {  		num_counters = AMD64_NUM_COUNTERS;  	} diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 59880afa851..71b5d5a07d7 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1,7 +1,7 @@  /*   *	SGI UltraViolet TLB flush routines.   * - *	(c) 2008-2011 Cliff Wickman <cpw@sgi.com>, SGI. + *	(c) 2008-2012 Cliff Wickman <cpw@sgi.com>, SGI.   *   *	This code is released under the GNU General Public License version 2 or   *	later. @@ -38,8 +38,7 @@ static int timeout_base_ns[] = {  static int timeout_us;  static int nobau; -static int baudisabled; -static spinlock_t disable_lock; +static int nobau_perm;  static cycles_t congested_cycles;  /* tunables: */ @@ -47,12 +46,13 @@ static int max_concurr		= MAX_BAU_CONCURRENT;  static int max_concurr_const	= MAX_BAU_CONCURRENT;  static int plugged_delay	= PLUGGED_DELAY;  static int plugsb4reset		= PLUGSB4RESET; +static int giveup_limit		= GIVEUP_LIMIT;  static int timeoutsb4reset	= TIMEOUTSB4RESET;  static int ipi_reset_limit	= IPI_RESET_LIMIT;  static int complete_threshold	= COMPLETE_THRESHOLD;  static int congested_respns_us	= CONGESTED_RESPONSE_US;  static int congested_reps	= CONGESTED_REPS; -static int congested_period	= CONGESTED_PERIOD; +static int disabled_period	= DISABLED_PERIOD;  static struct tunables tunables[] = {  	{&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ @@ -63,7 +63,8 @@ static struct tunables tunables[] = {  	{&complete_threshold, COMPLETE_THRESHOLD},  	{&congested_respns_us, CONGESTED_RESPONSE_US},  	{&congested_reps, CONGESTED_REPS}, -	{&congested_period, CONGESTED_PERIOD} +	{&disabled_period, DISABLED_PERIOD}, +	{&giveup_limit, GIVEUP_LIMIT}  };  static struct dentry *tunables_dir; @@ -120,6 +121,40 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats);  static DEFINE_PER_CPU(struct bau_control, bau_control);  static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); +static void +set_bau_on(void) +{ +	int cpu; +	struct bau_control *bcp; + +	if (nobau_perm) { +		pr_info("BAU not initialized; cannot be turned on\n"); +		return; +	} +	nobau = 0; +	for_each_present_cpu(cpu) { +		bcp = &per_cpu(bau_control, cpu); +		bcp->nobau = 0; +	} +	pr_info("BAU turned on\n"); +	return; +} + +static void +set_bau_off(void) +{ +	int cpu; +	struct bau_control *bcp; + +	nobau = 1; +	for_each_present_cpu(cpu) { +		bcp = &per_cpu(bau_control, cpu); +		bcp->nobau = 1; +	} +	pr_info("BAU turned off\n"); +	return; +} +  /*   * Determine the first node on a uvhub. 'Nodes' are used for kernel   * memory allocation. @@ -278,7 +313,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,  		 * Both sockets dump their completed count total into  		 * the message's count.  		 */ -		smaster->socket_acknowledge_count[mdp->msg_slot] = 0; +		*sp = 0;  		asp = (struct atomic_short *)&msg->acknowledge_count;  		msg_ack_count = atom_asr(socket_ack_count, asp); @@ -491,16 +526,15 @@ static int uv1_wait_completion(struct bau_desc *bau_desc,  }  /* - * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register. + * UV2 could have an extra bit of status in the ACTIVATION_STATUS_2 register. + * But not currently used.   */  static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc)  {  	unsigned long descriptor_status; -	unsigned long descriptor_status2; -	descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK); -	descriptor_status2 = (read_mmr_uv2_status() >> desc) & 0x1UL; -	descriptor_status = (descriptor_status << 1) | descriptor_status2; +	descriptor_status = +		((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1;  	return descriptor_status;  } @@ -531,87 +565,11 @@ int normal_busy(struct bau_control *bcp)   */  int handle_uv2_busy(struct bau_control *bcp)  { -	int busy_one = bcp->using_desc; -	int normal = bcp->uvhub_cpu; -	int selected = -1; -	int i; -	unsigned long descriptor_status; -	unsigned long status; -	int mmr_offset; -	struct bau_desc *bau_desc_old; -	struct bau_desc *bau_desc_new; -	struct bau_control *hmaster = bcp->uvhub_master;  	struct ptc_stats *stat = bcp->statp; -	cycles_t ttm;  	stat->s_uv2_wars++; -	spin_lock(&hmaster->uvhub_lock); -	/* try for the original first */ -	if (busy_one != normal) { -		if (!normal_busy(bcp)) -			selected = normal; -	} -	if (selected < 0) { -		/* can't use the normal, select an alternate */ -		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; -		descriptor_status = read_lmmr(mmr_offset); - -		/* scan available descriptors 32-63 */ -		for (i = 0; i < UV_CPUS_PER_AS; i++) { -			if ((hmaster->inuse_map & (1 << i)) == 0) { -				status = ((descriptor_status >> -						(i * UV_ACT_STATUS_SIZE)) & -						UV_ACT_STATUS_MASK) << 1; -				if (status != UV2H_DESC_BUSY) { -					selected = i + UV_CPUS_PER_AS; -					break; -				} -			} -		} -	} - -	if (busy_one != normal) -		/* mark the busy alternate as not in-use */ -		hmaster->inuse_map &= ~(1 << (busy_one - UV_CPUS_PER_AS)); - -	if (selected >= 0) { -		/* switch to the selected descriptor */ -		if (selected != normal) { -			/* set the selected alternate as in-use */ -			hmaster->inuse_map |= -					(1 << (selected - UV_CPUS_PER_AS)); -			if (selected > stat->s_uv2_wars_hw) -				stat->s_uv2_wars_hw = selected; -		} -		bau_desc_old = bcp->descriptor_base; -		bau_desc_old += (ITEMS_PER_DESC * busy_one); -		bcp->using_desc = selected; -		bau_desc_new = bcp->descriptor_base; -		bau_desc_new += (ITEMS_PER_DESC * selected); -		*bau_desc_new = *bau_desc_old; -	} else { -		/* -		 * All are busy. Wait for the normal one for this cpu to -		 * free up. -		 */ -		stat->s_uv2_war_waits++; -		spin_unlock(&hmaster->uvhub_lock); -		ttm = get_cycles(); -		do { -			cpu_relax(); -		} while (normal_busy(bcp)); -		spin_lock(&hmaster->uvhub_lock); -		/* switch to the original descriptor */ -		bcp->using_desc = normal; -		bau_desc_old = bcp->descriptor_base; -		bau_desc_old += (ITEMS_PER_DESC * bcp->using_desc); -		bcp->using_desc = (ITEMS_PER_DESC * normal); -		bau_desc_new = bcp->descriptor_base; -		bau_desc_new += (ITEMS_PER_DESC * normal); -		*bau_desc_new = *bau_desc_old; /* copy the entire descriptor */ -	} -	spin_unlock(&hmaster->uvhub_lock); -	return FLUSH_RETRY_BUSYBUG; +	bcp->busy = 1; +	return FLUSH_GIVEUP;  }  static int uv2_wait_completion(struct bau_desc *bau_desc, @@ -620,7 +578,7 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,  {  	unsigned long descriptor_stat;  	cycles_t ttm; -	int desc = bcp->using_desc; +	int desc = bcp->uvhub_cpu;  	long busy_reps = 0;  	struct ptc_stats *stat = bcp->statp; @@ -628,24 +586,38 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,  	/* spin on the status MMR, waiting for it to go idle */  	while (descriptor_stat != UV2H_DESC_IDLE) { -		/* -		 * Our software ack messages may be blocked because -		 * there are no swack resources available.  As long -		 * as none of them has timed out hardware will NACK -		 * our message and its state will stay IDLE. -		 */ -		if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) || -		    (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) { +		if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) { +			/* +			 * A h/w bug on the destination side may +			 * have prevented the message being marked +			 * pending, thus it doesn't get replied to +			 * and gets continually nacked until it times +			 * out with a SOURCE_TIMEOUT. +			 */  			stat->s_stimeout++;  			return FLUSH_GIVEUP; -		} else if (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) { -			stat->s_strongnacks++; -			bcp->conseccompletes = 0; -			return FLUSH_GIVEUP;  		} else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { +			ttm = get_cycles(); + +			/* +			 * Our retries may be blocked by all destination +			 * swack resources being consumed, and a timeout +			 * pending.  In that case hardware returns the +			 * ERROR that looks like a destination timeout. +			 * Without using the extended status we have to +			 * deduce from the short time that this was a +			 * strong nack. +			 */ +			if (cycles_2_us(ttm - bcp->send_message) < timeout_us) { +				bcp->conseccompletes = 0; +				stat->s_plugged++; +				/* FLUSH_RETRY_PLUGGED causes hang on boot */ +				return FLUSH_GIVEUP; +			}  			stat->s_dtimeout++;  			bcp->conseccompletes = 0; -			return FLUSH_RETRY_TIMEOUT; +			/* FLUSH_RETRY_TIMEOUT causes hang on boot */ +			return FLUSH_GIVEUP;  		} else {  			busy_reps++;  			if (busy_reps > 1000000) { @@ -653,9 +625,8 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,  				busy_reps = 0;  				ttm = get_cycles();  				if ((ttm - bcp->send_message) > -					(bcp->clocks_per_100_usec)) { +						bcp->timeout_interval)  					return handle_uv2_busy(bcp); -				}  			}  			/*  			 * descriptor_stat is still BUSY @@ -679,7 +650,7 @@ static int wait_completion(struct bau_desc *bau_desc,  {  	int right_shift;  	unsigned long mmr_offset; -	int desc = bcp->using_desc; +	int desc = bcp->uvhub_cpu;  	if (desc < UV_CPUS_PER_AS) {  		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; @@ -758,33 +729,31 @@ static void destination_timeout(struct bau_desc *bau_desc,  }  /* - * Completions are taking a very long time due to a congested numalink - * network. + * Stop all cpus on a uvhub from using the BAU for a period of time. + * This is reversed by check_enable.   */ -static void disable_for_congestion(struct bau_control *bcp, -					struct ptc_stats *stat) +static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)  { -	/* let only one cpu do this disabling */ -	spin_lock(&disable_lock); +	int tcpu; +	struct bau_control *tbcp; +	struct bau_control *hmaster; +	cycles_t tm1; -	if (!baudisabled && bcp->period_requests && -	    ((bcp->period_time / bcp->period_requests) > congested_cycles)) { -		int tcpu; -		struct bau_control *tbcp; -		/* it becomes this cpu's job to turn on the use of the -		   BAU again */ -		baudisabled = 1; -		bcp->set_bau_off = 1; -		bcp->set_bau_on_time = get_cycles(); -		bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period); +	hmaster = bcp->uvhub_master; +	spin_lock(&hmaster->disable_lock); +	if (!bcp->baudisabled) {  		stat->s_bau_disabled++; +		tm1 = get_cycles();  		for_each_present_cpu(tcpu) {  			tbcp = &per_cpu(bau_control, tcpu); -			tbcp->baudisabled = 1; +			if (tbcp->uvhub_master == hmaster) { +				tbcp->baudisabled = 1; +				tbcp->set_bau_on_time = +					tm1 + bcp->disabled_period; +			}  		}  	} - -	spin_unlock(&disable_lock); +	spin_unlock(&hmaster->disable_lock);  }  static void count_max_concurr(int stat, struct bau_control *bcp, @@ -815,16 +784,30 @@ static void record_send_stats(cycles_t time1, cycles_t time2,  			bcp->period_requests++;  			bcp->period_time += elapsed;  			if ((elapsed > congested_cycles) && -			    (bcp->period_requests > bcp->cong_reps)) -				disable_for_congestion(bcp, stat); +			    (bcp->period_requests > bcp->cong_reps) && +			    ((bcp->period_time / bcp->period_requests) > +							congested_cycles)) { +				stat->s_congested++; +				disable_for_period(bcp, stat); +			}  		}  	} else  		stat->s_requestor--;  	if (completion_status == FLUSH_COMPLETE && try > 1)  		stat->s_retriesok++; -	else if (completion_status == FLUSH_GIVEUP) +	else if (completion_status == FLUSH_GIVEUP) {  		stat->s_giveup++; +		if (get_cycles() > bcp->period_end) +			bcp->period_giveups = 0; +		bcp->period_giveups++; +		if (bcp->period_giveups == 1) +			bcp->period_end = get_cycles() + bcp->disabled_period; +		if (bcp->period_giveups > bcp->giveup_limit) { +			disable_for_period(bcp, stat); +			stat->s_giveuplimit++; +		} +	}  }  /* @@ -868,7 +851,8 @@ static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,   * Returns 1 if it gives up entirely and the original cpu mask is to be   * returned to the kernel.   */ -int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) +int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp, +	struct bau_desc *bau_desc)  {  	int seq_number = 0;  	int completion_stat = 0; @@ -881,24 +865,23 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)  	struct bau_control *hmaster = bcp->uvhub_master;  	struct uv1_bau_msg_header *uv1_hdr = NULL;  	struct uv2_bau_msg_header *uv2_hdr = NULL; -	struct bau_desc *bau_desc; -	if (bcp->uvhub_version == 1) +	if (bcp->uvhub_version == 1) { +		uv1 = 1;  		uv1_throttle(hmaster, stat); +	}  	while (hmaster->uvhub_quiesce)  		cpu_relax();  	time1 = get_cycles(); +	if (uv1) +		uv1_hdr = &bau_desc->header.uv1_hdr; +	else +		uv2_hdr = &bau_desc->header.uv2_hdr; +  	do { -		bau_desc = bcp->descriptor_base; -		bau_desc += (ITEMS_PER_DESC * bcp->using_desc); -		if (bcp->uvhub_version == 1) { -			uv1 = 1; -			uv1_hdr = &bau_desc->header.uv1_hdr; -		} else -			uv2_hdr = &bau_desc->header.uv2_hdr; -		if ((try == 0) || (completion_stat == FLUSH_RETRY_BUSYBUG)) { +		if (try == 0) {  			if (uv1)  				uv1_hdr->msg_type = MSG_REGULAR;  			else @@ -916,25 +899,24 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)  			uv1_hdr->sequence = seq_number;  		else  			uv2_hdr->sequence = seq_number; -		index = (1UL << AS_PUSH_SHIFT) | bcp->using_desc; +		index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;  		bcp->send_message = get_cycles();  		write_mmr_activation(index);  		try++;  		completion_stat = wait_completion(bau_desc, bcp, try); -		/* UV2: wait_completion() may change the bcp->using_desc */  		handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);  		if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {  			bcp->ipi_attempts = 0; +			stat->s_overipilimit++;  			completion_stat = FLUSH_GIVEUP;  			break;  		}  		cpu_relax();  	} while ((completion_stat == FLUSH_RETRY_PLUGGED) || -		 (completion_stat == FLUSH_RETRY_BUSYBUG) ||  		 (completion_stat == FLUSH_RETRY_TIMEOUT));  	time2 = get_cycles(); @@ -955,28 +937,33 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)  }  /* - * The BAU is disabled. When the disabled time period has expired, the cpu - * that disabled it must re-enable it. - * Return 0 if it is re-enabled for all cpus. + * The BAU is disabled for this uvhub. When the disabled time period has + * expired re-enable it. + * Return 0 if it is re-enabled for all cpus on this uvhub.   */  static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)  {  	int tcpu;  	struct bau_control *tbcp; +	struct bau_control *hmaster; -	if (bcp->set_bau_off) { -		if (get_cycles() >= bcp->set_bau_on_time) { -			stat->s_bau_reenabled++; -			baudisabled = 0; -			for_each_present_cpu(tcpu) { -				tbcp = &per_cpu(bau_control, tcpu); +	hmaster = bcp->uvhub_master; +	spin_lock(&hmaster->disable_lock); +	if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { +		stat->s_bau_reenabled++; +		for_each_present_cpu(tcpu) { +			tbcp = &per_cpu(bau_control, tcpu); +			if (tbcp->uvhub_master == hmaster) {  				tbcp->baudisabled = 0;  				tbcp->period_requests = 0;  				tbcp->period_time = 0; +				tbcp->period_giveups = 0;  			} -			return 0;  		} +		spin_unlock(&hmaster->disable_lock); +		return 0;  	} +	spin_unlock(&hmaster->disable_lock);  	return -1;  } @@ -1078,18 +1065,32 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,  	struct cpumask *flush_mask;  	struct ptc_stats *stat;  	struct bau_control *bcp; - -	/* kernel was booted 'nobau' */ -	if (nobau) -		return cpumask; +	unsigned long descriptor_status; +	unsigned long status;  	bcp = &per_cpu(bau_control, cpu);  	stat = bcp->statp; +	stat->s_enters++; + +	if (bcp->nobau) +		return cpumask; + +	if (bcp->busy) { +		descriptor_status = +			read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_0); +		status = ((descriptor_status >> (bcp->uvhub_cpu * +			UV_ACT_STATUS_SIZE)) & UV_ACT_STATUS_MASK) << 1; +		if (status == UV2H_DESC_BUSY) +			return cpumask; +		bcp->busy = 0; +	}  	/* bau was disabled due to slow response */  	if (bcp->baudisabled) { -		if (check_enable(bcp, stat)) +		if (check_enable(bcp, stat)) { +			stat->s_ipifordisabled++;  			return cpumask; +		}  	}  	/* @@ -1105,7 +1106,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,  		stat->s_ntargself++;  	bau_desc = bcp->descriptor_base; -	bau_desc += (ITEMS_PER_DESC * bcp->using_desc); +	bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu);  	bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);  	if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))  		return NULL; @@ -1118,25 +1119,27 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,  	 * uv_flush_send_and_wait returns 0 if all cpu's were messaged,  	 * or 1 if it gave up and the original cpumask should be returned.  	 */ -	if (!uv_flush_send_and_wait(flush_mask, bcp)) +	if (!uv_flush_send_and_wait(flush_mask, bcp, bau_desc))  		return NULL;  	else  		return cpumask;  }  /* - * Search the message queue for any 'other' message with the same software - * acknowledge resource bit vector. + * Search the message queue for any 'other' unprocessed message with the + * same software acknowledge resource bit vector as the 'msg' message.   */  struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg, -			struct bau_control *bcp, unsigned char swack_vec) +					   struct bau_control *bcp)  {  	struct bau_pq_entry *msg_next = msg + 1; +	unsigned char swack_vec = msg->swack_vec;  	if (msg_next > bcp->queue_last)  		msg_next = bcp->queue_first; -	while ((msg_next->swack_vec != 0) && (msg_next != msg)) { -		if (msg_next->swack_vec == swack_vec) +	while (msg_next != msg) { +		if ((msg_next->canceled == 0) && (msg_next->replied_to == 0) && +				(msg_next->swack_vec == swack_vec))  			return msg_next;  		msg_next++;  		if (msg_next > bcp->queue_last) @@ -1165,32 +1168,30 @@ void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp)  		 * This message was assigned a swack resource, but no  		 * reserved acknowlegment is pending.  		 * The bug has prevented this message from setting the MMR. -		 * And no other message has used the same sw_ack resource. -		 * Do the requested shootdown but do not reply to the msg. -		 * (the 0 means make no acknowledge)  		 */ -		bau_process_message(mdp, bcp, 0); -		return; -	} - -	/* -	 * Some message has set the MMR 'pending' bit; it might have been -	 * another message.  Look for that message. -	 */ -	other_msg = find_another_by_swack(msg, bcp, msg->swack_vec); -	if (other_msg) { -		/* There is another.  Do not ack the current one. */ -		bau_process_message(mdp, bcp, 0);  		/* -		 * Let the natural processing of that message acknowledge -		 * it. Don't get the processing of sw_ack's out of order. +		 * Some message has set the MMR 'pending' bit; it might have +		 * been another message.  Look for that message.  		 */ -		return; +		other_msg = find_another_by_swack(msg, bcp); +		if (other_msg) { +			/* +			 * There is another. Process this one but do not +			 * ack it. +			 */ +			bau_process_message(mdp, bcp, 0); +			/* +			 * Let the natural processing of that other message +			 * acknowledge it. Don't get the processing of sw_ack's +			 * out of order. +			 */ +			return; +		}  	}  	/* -	 * There is no other message using this sw_ack, so it is safe to -	 * acknowledge it. +	 * Either the MMR shows this one pending a reply or there is no +	 * other message using this sw_ack, so it is safe to acknowledge it.  	 */  	bau_process_message(mdp, bcp, 1); @@ -1295,7 +1296,8 @@ static void __init enable_timeouts(void)  		 */  		mmr_image |= (1L << SOFTACK_MSHIFT);  		if (is_uv2_hub()) { -			mmr_image |= (1L << UV2_EXT_SHFT); +			/* hw bug workaround; do not use extended status */ +			mmr_image &= ~(1L << UV2_EXT_SHFT);  		}  		write_mmr_misc_control(pnode, mmr_image);  	} @@ -1338,29 +1340,34 @@ static inline unsigned long long usec_2_cycles(unsigned long microsec)  static int ptc_seq_show(struct seq_file *file, void *data)  {  	struct ptc_stats *stat; +	struct bau_control *bcp;  	int cpu;  	cpu = *(loff_t *)data;  	if (!cpu) {  		seq_printf(file, -			"# cpu sent stime self locals remotes ncpus localhub "); +		 "# cpu bauoff sent stime self locals remotes ncpus localhub ");  		seq_printf(file,  			"remotehub numuvhubs numuvhubs16 numuvhubs8 ");  		seq_printf(file, -		    "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries rok "); +			"numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries "); +		seq_printf(file, +			"rok resetp resett giveup sto bz throt disable ");  		seq_printf(file, -			"resetp resett giveup sto bz throt swack recv rtime "); +			"enable wars warshw warwaits enters ipidis plugged ");  		seq_printf(file, -			"all one mult none retry canc nocan reset rcan "); +			"ipiover glim cong swack recv rtime all one mult ");  		seq_printf(file, -			"disable enable wars warshw warwaits\n"); +			"none retry canc nocan reset rcan\n");  	}  	if (cpu < num_possible_cpus() && cpu_online(cpu)) { -		stat = &per_cpu(ptcstats, cpu); +		bcp = &per_cpu(bau_control, cpu); +		stat = bcp->statp;  		/* source side statistics */  		seq_printf(file, -			"cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", -			   cpu, stat->s_requestor, cycles_2_us(stat->s_time), +			"cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", +			   cpu, bcp->nobau, stat->s_requestor, +			   cycles_2_us(stat->s_time),  			   stat->s_ntargself, stat->s_ntarglocals,  			   stat->s_ntargremotes, stat->s_ntargcpu,  			   stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, @@ -1374,20 +1381,23 @@ static int ptc_seq_show(struct seq_file *file, void *data)  			   stat->s_resets_plug, stat->s_resets_timeout,  			   stat->s_giveup, stat->s_stimeout,  			   stat->s_busy, stat->s_throttles); +		seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", +			   stat->s_bau_disabled, stat->s_bau_reenabled, +			   stat->s_uv2_wars, stat->s_uv2_wars_hw, +			   stat->s_uv2_war_waits, stat->s_enters, +			   stat->s_ipifordisabled, stat->s_plugged, +			   stat->s_overipilimit, stat->s_giveuplimit, +			   stat->s_congested);  		/* destination side statistics */  		seq_printf(file, -			   "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", +			"%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n",  			   read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)),  			   stat->d_requestee, cycles_2_us(stat->d_time),  			   stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,  			   stat->d_nomsg, stat->d_retries, stat->d_canceled,  			   stat->d_nocanceled, stat->d_resets,  			   stat->d_rcanceled); -		seq_printf(file, "%ld %ld %ld %ld %ld\n", -			stat->s_bau_disabled, stat->s_bau_reenabled, -			stat->s_uv2_wars, stat->s_uv2_wars_hw, -			stat->s_uv2_war_waits);  	}  	return 0;  } @@ -1401,13 +1411,14 @@ static ssize_t tunables_read(struct file *file, char __user *userbuf,  	char *buf;  	int ret; -	buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", -		"max_concur plugged_delay plugsb4reset", -		"timeoutsb4reset ipi_reset_limit complete_threshold", -		"congested_response_us congested_reps congested_period", +	buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d %d\n", +		"max_concur plugged_delay plugsb4reset timeoutsb4reset", +		"ipi_reset_limit complete_threshold congested_response_us", +		"congested_reps disabled_period giveup_limit",  		max_concurr, plugged_delay, plugsb4reset,  		timeoutsb4reset, ipi_reset_limit, complete_threshold, -		congested_respns_us, congested_reps, congested_period); +		congested_respns_us, congested_reps, disabled_period, +		giveup_limit);  	if (!buf)  		return -ENOMEM; @@ -1438,6 +1449,14 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user,  		return -EFAULT;  	optstr[count - 1] = '\0'; +	if (!strcmp(optstr, "on")) { +		set_bau_on(); +		return count; +	} else if (!strcmp(optstr, "off")) { +		set_bau_off(); +		return count; +	} +  	if (strict_strtol(optstr, 10, &input_arg) < 0) {  		printk(KERN_DEBUG "%s is invalid\n", optstr);  		return -EINVAL; @@ -1570,7 +1589,8 @@ static ssize_t tunables_write(struct file *file, const char __user *user,  		bcp->complete_threshold =	complete_threshold;  		bcp->cong_response_us =		congested_respns_us;  		bcp->cong_reps =		congested_reps; -		bcp->cong_period =		congested_period; +		bcp->disabled_period =		sec_2_cycles(disabled_period); +		bcp->giveup_limit =		giveup_limit;  	}  	return count;  } @@ -1699,6 +1719,10 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)  			 *   fairness chaining multilevel count replied_to  			 */  		} else { +			/* +			 * BIOS uses legacy mode, but UV2 hardware always +			 * uses native mode for selective broadcasts. +			 */  			uv2_hdr = &bd2->header.uv2_hdr;  			uv2_hdr->swack_flag =	1;  			uv2_hdr->base_dest_nasid = @@ -1811,8 +1835,8 @@ static int calculate_destination_timeout(void)  		index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK;  		mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT);  		mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; -		base = timeout_base_ns[index]; -		ts_ns = base * mult1 * mult2; +		ts_ns = timeout_base_ns[index]; +		ts_ns *= (mult1 * mult2);  		ret = ts_ns / 1000;  	} else {  		/* 4 bits  0/1 for 10/80us base, 3 bits of multiplier */ @@ -1836,6 +1860,8 @@ static void __init init_per_cpu_tunables(void)  	for_each_present_cpu(cpu) {  		bcp = &per_cpu(bau_control, cpu);  		bcp->baudisabled		= 0; +		if (nobau) +			bcp->nobau		= 1;  		bcp->statp			= &per_cpu(ptcstats, cpu);  		/* time interval to catch a hardware stay-busy bug */  		bcp->timeout_interval		= usec_2_cycles(2*timeout_us); @@ -1848,10 +1874,11 @@ static void __init init_per_cpu_tunables(void)  		bcp->complete_threshold		= complete_threshold;  		bcp->cong_response_us		= congested_respns_us;  		bcp->cong_reps			= congested_reps; -		bcp->cong_period		= congested_period; -		bcp->clocks_per_100_usec =	usec_2_cycles(100); +		bcp->disabled_period =		sec_2_cycles(disabled_period); +		bcp->giveup_limit =		giveup_limit;  		spin_lock_init(&bcp->queue_lock);  		spin_lock_init(&bcp->uvhub_lock); +		spin_lock_init(&bcp->disable_lock);  	}  } @@ -1972,7 +1999,6 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,  		}  		bcp->uvhub_master = *hmasterp;  		bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; -		bcp->using_desc = bcp->uvhub_cpu;  		if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {  			printk(KERN_EMERG "%d cpus per uvhub invalid\n",  				bcp->uvhub_cpu); @@ -2069,16 +2095,12 @@ static int __init uv_bau_init(void)  	if (!is_uv_system())  		return 0; -	if (nobau) -		return 0; -  	for_each_possible_cpu(cur_cpu) {  		mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);  		zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));  	}  	nuvhubs = uv_num_possible_blades(); -	spin_lock_init(&disable_lock);  	congested_cycles = usec_2_cycles(congested_respns_us);  	uv_base_pnode = 0x7fffffff; @@ -2091,7 +2113,8 @@ static int __init uv_bau_init(void)  	enable_timeouts();  	if (init_per_cpu(nuvhubs, uv_base_pnode)) { -		nobau = 1; +		set_bau_off(); +		nobau_perm = 1;  		return 0;  	} diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index f25c2765a5c..acf7752da95 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -135,6 +135,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,  	unsigned long mmr_value;  	struct uv_IO_APIC_route_entry *entry;  	int mmr_pnode, err; +	unsigned int dest;  	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=  			sizeof(unsigned long)); @@ -143,6 +144,10 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,  	if (err != 0)  		return err; +	err = apic->cpu_mask_to_apicid_and(eligible_cpu, eligible_cpu, &dest); +	if (err != 0) +		return err; +  	if (limit == UV_AFFINITY_CPU)  		irq_set_status_flags(irq, IRQ_NO_BALANCING);  	else @@ -159,7 +164,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,  	entry->polarity		= 0;  	entry->trigger		= 0;  	entry->mask		= 0; -	entry->dest		= apic->cpu_mask_to_apicid(eligible_cpu); +	entry->dest		= dest;  	mmr_pnode = uv_blade_to_pnode(mmr_blade);  	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); @@ -222,7 +227,7 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,  	if (cfg->move_in_progress)  		send_cleanup_vector(cfg); -	return 0; +	return IRQ_SET_MASK_OK_NOCOPY;  }  /* diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 5b84a2d3088..b2d534cab25 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -22,7 +22,7 @@ wakeup-objs	+= video-bios.o  realmode-y			+= header.o  realmode-y			+= trampoline_$(BITS).o  realmode-y			+= stack.o -realmode-$(CONFIG_X86_32)	+= reboot_32.o +realmode-y			+= reboot.o  realmode-$(CONFIG_ACPI_SLEEP)	+= $(wakeup-objs)  targets	+= $(realmode-y) diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S index fadf48378ad..a28221d94e6 100644 --- a/arch/x86/realmode/rm/header.S +++ b/arch/x86/realmode/rm/header.S @@ -6,6 +6,7 @@  #include <linux/linkage.h>  #include <asm/page_types.h> +#include <asm/segment.h>  #include "realmode.h" @@ -28,8 +29,9 @@ GLOBAL(real_mode_header)  	.long	pa_wakeup_header  #endif  	/* APM/BIOS reboot */ -#ifdef CONFIG_X86_32  	.long	pa_machine_real_restart_asm +#ifdef CONFIG_X86_64 +	.long	__KERNEL32_CS  #endif  END(real_mode_header) diff --git a/arch/x86/realmode/rm/reboot_32.S b/arch/x86/realmode/rm/reboot.S index 114044876b3..f932ea61d1c 100644 --- a/arch/x86/realmode/rm/reboot_32.S +++ b/arch/x86/realmode/rm/reboot.S @@ -2,6 +2,8 @@  #include <linux/init.h>  #include <asm/segment.h>  #include <asm/page_types.h> +#include <asm/processor-flags.h> +#include <asm/msr-index.h>  #include "realmode.h"  /* @@ -12,13 +14,35 @@   * doesn't work with at least one type of 486 motherboard.  It is easy   * to stop this code working; hence the copious comments.   * - * This code is called with the restart type (0 = BIOS, 1 = APM) in %eax. + * This code is called with the restart type (0 = BIOS, 1 = APM) in + * the primary argument register (%eax for 32 bit, %edi for 64 bit).   */  	.section ".text32", "ax"  	.code32 - -	.balign	16  ENTRY(machine_real_restart_asm) + +#ifdef CONFIG_X86_64 +	/* Switch to trampoline GDT as it is guaranteed < 4 GiB */ +	movl	$__KERNEL_DS, %eax +	movl	%eax, %ds +	lgdtl	pa_tr_gdt + +	/* Disable paging to drop us out of long mode */ +	movl	%cr0, %eax +	andl	$~X86_CR0_PG, %eax +	movl	%eax, %cr0 +	ljmpl	$__KERNEL32_CS, $pa_machine_real_restart_paging_off + +GLOBAL(machine_real_restart_paging_off) +	xorl	%eax, %eax +	xorl	%edx, %edx +	movl	$MSR_EFER, %ecx +	wrmsr + +	movl	%edi, %eax +	 +#endif /* CONFIG_X86_64 */ +	  	/* Set up the IDT for real mode. */  	lidtl	pa_machine_real_restart_idt diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 66e6d935982..0faad646f5f 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -205,9 +205,9 @@ void syscall32_cpu_init(void)  {  	/* Load these always in case some future AMD CPU supports  	   SYSENTER from compat mode too. */ -	checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); -	checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL); -	checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); +	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); +	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); +	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);  	wrmsrl(MSR_CSTAR, ia32_cstar_target);  } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ff962d4b821..ed7d54985d0 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1124,9 +1124,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {  	.wbinvd = native_wbinvd,  	.read_msr = native_read_msr_safe, -	.rdmsr_regs = native_rdmsr_safe_regs,  	.write_msr = xen_write_msr_safe, -	.wrmsr_regs = native_wrmsr_safe_regs,  	.read_tsc = native_read_tsc,  	.read_pmc = native_read_pmc, diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index afb250d22a6..f58dca7a6e5 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -80,9 +80,7 @@ static void __cpuinit cpu_bringup(void)  	notify_cpu_starting(cpu); -	ipi_call_lock();  	set_cpu_online(cpu, true); -	ipi_call_unlock();  	this_cpu_write(cpu_state, CPU_ONLINE); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index dcb8a6e4869..4b01ab3d2c2 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -24,6 +24,7 @@  #include <linux/wait.h>  #include <linux/async.h>  #include <linux/pm_runtime.h> +#include <scsi/scsi_scan.h>  #include "base.h"  #include "power/power.h" @@ -332,6 +333,7 @@ void wait_for_device_probe(void)  	/* wait for the known devices to complete their probing */  	wait_event(probe_waitqueue, atomic_read(&probe_count) == 0);  	async_synchronize_full(); +	scsi_complete_async_scans();  }  EXPORT_SYMBOL_GPL(wait_for_device_probe); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 65665c9c42c..8f428a8ab00 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -499,7 +499,7 @@ static int rbd_header_from_disk(struct rbd_image_header *header,  			 / sizeof (*ondisk))  		return -EINVAL;  	header->snapc = kmalloc(sizeof(struct ceph_snap_context) + -				snap_count * sizeof (*ondisk), +				snap_count * sizeof(u64),  				gfp_flags);  	if (!header->snapc)  		return -ENOMEM; @@ -977,7 +977,7 @@ static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)  	op = (void *)(replyhead + 1);  	rc = le32_to_cpu(replyhead->result);  	bytes = le64_to_cpu(op->extent.length); -	read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ); +	read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ);  	dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc); diff --git a/drivers/clk/spear/spear1310_clock.c b/drivers/clk/spear/spear1310_clock.c index 8f05652d53e..0fcec2aae19 100644 --- a/drivers/clk/spear/spear1310_clock.c +++ b/drivers/clk/spear/spear1310_clock.c @@ -345,31 +345,30 @@ static struct frac_rate_tbl gen_rtbl[] = {  /* clock parents */  static const char *vco_parents[] = { "osc_24m_clk", "osc_25m_clk", };  static const char *gpt_parents[] = { "osc_24m_clk", "apb_clk", }; -static const char *uart0_parents[] = { "pll5_clk", "uart_synth_gate_clk", }; -static const char *c3_parents[] = { "pll5_clk", "c3_synth_gate_clk", }; -static const char *gmac_phy_input_parents[] = { "gmii_125m_pad_clk", "pll2_clk", +static const char *uart0_parents[] = { "pll5_clk", "uart_syn_gclk", }; +static const char *c3_parents[] = { "pll5_clk", "c3_syn_gclk", }; +static const char *gmac_phy_input_parents[] = { "gmii_pad_clk", "pll2_clk",  	"osc_25m_clk", }; -static const char *gmac_phy_parents[] = { "gmac_phy_input_mux_clk", -	"gmac_phy_synth_gate_clk", }; +static const char *gmac_phy_parents[] = { "phy_input_mclk", "phy_syn_gclk", };  static const char *clcd_synth_parents[] = { "vco1div4_clk", "pll2_clk", }; -static const char *clcd_pixel_parents[] = { "pll5_clk", "clcd_synth_clk", }; +static const char *clcd_pixel_parents[] = { "pll5_clk", "clcd_syn_clk", };  static const char *i2s_src_parents[] = { "vco1div2_clk", "none", "pll3_clk",  	"i2s_src_pad_clk", }; -static const char *i2s_ref_parents[] = { "i2s_src_mux_clk", "i2s_prs1_clk", }; +static const char *i2s_ref_parents[] = { "i2s_src_mclk", "i2s_prs1_clk", };  static const char *gen_synth0_1_parents[] = { "vco1div4_clk", "vco3div2_clk",  	"pll3_clk", };  static const char *gen_synth2_3_parents[] = { "vco1div4_clk", "vco3div2_clk",  	"pll2_clk", };  static const char *rmii_phy_parents[] = { "ras_tx50_clk", "none", -	"ras_pll2_clk", "ras_synth0_clk", }; +	"ras_pll2_clk", "ras_syn0_clk", };  static const char *smii_rgmii_phy_parents[] = { "none", "ras_tx125_clk", -	"ras_pll2_clk", "ras_synth0_clk", }; -static const char *uart_parents[] = { "ras_apb_clk", "gen_synth3_clk", }; -static const char *i2c_parents[] = { "ras_apb_clk", "gen_synth1_clk", }; -static const char *ssp1_parents[] = { "ras_apb_clk", "gen_synth1_clk", +	"ras_pll2_clk", "ras_syn0_clk", }; +static const char *uart_parents[] = { "ras_apb_clk", "gen_syn3_clk", }; +static const char *i2c_parents[] = { "ras_apb_clk", "gen_syn1_clk", }; +static const char *ssp1_parents[] = { "ras_apb_clk", "gen_syn1_clk",  	"ras_plclk0_clk", }; -static const char *pci_parents[] = { "ras_pll3_clk", "gen_synth2_clk", }; -static const char *tdm_parents[] = { "ras_pll3_clk", "gen_synth1_clk", }; +static const char *pci_parents[] = { "ras_pll3_clk", "gen_syn2_clk", }; +static const char *tdm_parents[] = { "ras_pll3_clk", "gen_syn1_clk", };  void __init spear1310_clk_init(void)  { @@ -390,9 +389,9 @@ void __init spear1310_clk_init(void)  			25000000);  	clk_register_clkdev(clk, "osc_25m_clk", NULL); -	clk = clk_register_fixed_rate(NULL, "gmii_125m_pad_clk", NULL, -			CLK_IS_ROOT, 125000000); -	clk_register_clkdev(clk, "gmii_125m_pad_clk", NULL); +	clk = clk_register_fixed_rate(NULL, "gmii_pad_clk", NULL, CLK_IS_ROOT, +			125000000); +	clk_register_clkdev(clk, "gmii_pad_clk", NULL);  	clk = clk_register_fixed_rate(NULL, "i2s_src_pad_clk", NULL,  			CLK_IS_ROOT, 12288000); @@ -406,34 +405,34 @@ void __init spear1310_clk_init(void)  	/* clock derived from 24 or 25 MHz osc clk */  	/* vco-pll */ -	clk = clk_register_mux(NULL, "vco1_mux_clk", vco_parents, +	clk = clk_register_mux(NULL, "vco1_mclk", vco_parents,  			ARRAY_SIZE(vco_parents), 0, SPEAR1310_PLL_CFG,  			SPEAR1310_PLL1_CLK_SHIFT, SPEAR1310_PLL_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "vco1_mux_clk", NULL); -	clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mux_clk", +	clk_register_clkdev(clk, "vco1_mclk", NULL); +	clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mclk",  			0, SPEAR1310_PLL1_CTR, SPEAR1310_PLL1_FRQ, pll_rtbl,  			ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL);  	clk_register_clkdev(clk, "vco1_clk", NULL);  	clk_register_clkdev(clk1, "pll1_clk", NULL); -	clk = clk_register_mux(NULL, "vco2_mux_clk", vco_parents, +	clk = clk_register_mux(NULL, "vco2_mclk", vco_parents,  			ARRAY_SIZE(vco_parents), 0, SPEAR1310_PLL_CFG,  			SPEAR1310_PLL2_CLK_SHIFT, SPEAR1310_PLL_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "vco2_mux_clk", NULL); -	clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mux_clk", +	clk_register_clkdev(clk, "vco2_mclk", NULL); +	clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mclk",  			0, SPEAR1310_PLL2_CTR, SPEAR1310_PLL2_FRQ, pll_rtbl,  			ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL);  	clk_register_clkdev(clk, "vco2_clk", NULL);  	clk_register_clkdev(clk1, "pll2_clk", NULL); -	clk = clk_register_mux(NULL, "vco3_mux_clk", vco_parents, +	clk = clk_register_mux(NULL, "vco3_mclk", vco_parents,  			ARRAY_SIZE(vco_parents), 0, SPEAR1310_PLL_CFG,  			SPEAR1310_PLL3_CLK_SHIFT, SPEAR1310_PLL_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "vco3_mux_clk", NULL); -	clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mux_clk", +	clk_register_clkdev(clk, "vco3_mclk", NULL); +	clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mclk",  			0, SPEAR1310_PLL3_CTR, SPEAR1310_PLL3_FRQ, pll_rtbl,  			ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL);  	clk_register_clkdev(clk, "vco3_clk", NULL); @@ -473,7 +472,7 @@ void __init spear1310_clk_init(void)  	/* peripherals */  	clk_register_fixed_factor(NULL, "thermal_clk", "osc_24m_clk", 0, 1,  			128); -	clk = clk_register_gate(NULL, "thermal_gate_clk", "thermal_clk", 0, +	clk = clk_register_gate(NULL, "thermal_gclk", "thermal_clk", 0,  			SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_THSENS_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "spear_thermal"); @@ -500,177 +499,176 @@ void __init spear1310_clk_init(void)  	clk_register_clkdev(clk, "apb_clk", NULL);  	/* gpt clocks */ -	clk = clk_register_mux(NULL, "gpt0_mux_clk", gpt_parents, +	clk = clk_register_mux(NULL, "gpt0_mclk", gpt_parents,  			ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG,  			SPEAR1310_GPT0_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "gpt0_mux_clk", NULL); -	clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mux_clk", 0, +	clk_register_clkdev(clk, "gpt0_mclk", NULL); +	clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mclk", 0,  			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_GPT0_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "gpt0"); -	clk = clk_register_mux(NULL, "gpt1_mux_clk", gpt_parents, +	clk = clk_register_mux(NULL, "gpt1_mclk", gpt_parents,  			ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG,  			SPEAR1310_GPT1_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "gpt1_mux_clk", NULL); -	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mux_clk", 0, +	clk_register_clkdev(clk, "gpt1_mclk", NULL); +	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0,  			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_GPT1_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "gpt1"); -	clk = clk_register_mux(NULL, "gpt2_mux_clk", gpt_parents, +	clk = clk_register_mux(NULL, "gpt2_mclk", gpt_parents,  			ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG,  			SPEAR1310_GPT2_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "gpt2_mux_clk", NULL); -	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mux_clk", 0, +	clk_register_clkdev(clk, "gpt2_mclk", NULL); +	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0,  			SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_GPT2_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "gpt2"); -	clk = clk_register_mux(NULL, "gpt3_mux_clk", gpt_parents, +	clk = clk_register_mux(NULL, "gpt3_mclk", gpt_parents,  			ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG,  			SPEAR1310_GPT3_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "gpt3_mux_clk", NULL); -	clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mux_clk", 0, +	clk_register_clkdev(clk, "gpt3_mclk", NULL); +	clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mclk", 0,  			SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_GPT3_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "gpt3");  	/* others */ -	clk = clk_register_aux("uart_synth_clk", "uart_synth_gate_clk", -			"vco1div2_clk", 0, SPEAR1310_UART_CLK_SYNT, NULL, -			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "uart_synth_clk", NULL); -	clk_register_clkdev(clk1, "uart_synth_gate_clk", NULL); +	clk = clk_register_aux("uart_syn_clk", "uart_syn_gclk", "vco1div2_clk", +			0, SPEAR1310_UART_CLK_SYNT, NULL, aux_rtbl, +			ARRAY_SIZE(aux_rtbl), &_lock, &clk1); +	clk_register_clkdev(clk, "uart_syn_clk", NULL); +	clk_register_clkdev(clk1, "uart_syn_gclk", NULL); -	clk = clk_register_mux(NULL, "uart0_mux_clk", uart0_parents, +	clk = clk_register_mux(NULL, "uart0_mclk", uart0_parents,  			ARRAY_SIZE(uart0_parents), 0, SPEAR1310_PERIP_CLK_CFG,  			SPEAR1310_UART_CLK_SHIFT, SPEAR1310_UART_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "uart0_mux_clk", NULL); +	clk_register_clkdev(clk, "uart0_mclk", NULL); -	clk = clk_register_gate(NULL, "uart0_clk", "uart0_mux_clk", 0, +	clk = clk_register_gate(NULL, "uart0_clk", "uart0_mclk", 0,  			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_UART_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "e0000000.serial"); -	clk = clk_register_aux("sdhci_synth_clk", "sdhci_synth_gate_clk", +	clk = clk_register_aux("sdhci_syn_clk", "sdhci_syn_gclk",  			"vco1div2_clk", 0, SPEAR1310_SDHCI_CLK_SYNT, NULL,  			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "sdhci_synth_clk", NULL); -	clk_register_clkdev(clk1, "sdhci_synth_gate_clk", NULL); +	clk_register_clkdev(clk, "sdhci_syn_clk", NULL); +	clk_register_clkdev(clk1, "sdhci_syn_gclk", NULL); -	clk = clk_register_gate(NULL, "sdhci_clk", "sdhci_synth_gate_clk", 0, +	clk = clk_register_gate(NULL, "sdhci_clk", "sdhci_syn_gclk", 0,  			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_SDHCI_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "b3000000.sdhci"); -	clk = clk_register_aux("cfxd_synth_clk", "cfxd_synth_gate_clk", -			"vco1div2_clk", 0, SPEAR1310_CFXD_CLK_SYNT, NULL, -			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "cfxd_synth_clk", NULL); -	clk_register_clkdev(clk1, "cfxd_synth_gate_clk", NULL); +	clk = clk_register_aux("cfxd_syn_clk", "cfxd_syn_gclk", "vco1div2_clk", +			0, SPEAR1310_CFXD_CLK_SYNT, NULL, aux_rtbl, +			ARRAY_SIZE(aux_rtbl), &_lock, &clk1); +	clk_register_clkdev(clk, "cfxd_syn_clk", NULL); +	clk_register_clkdev(clk1, "cfxd_syn_gclk", NULL); -	clk = clk_register_gate(NULL, "cfxd_clk", "cfxd_synth_gate_clk", 0, +	clk = clk_register_gate(NULL, "cfxd_clk", "cfxd_syn_gclk", 0,  			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_CFXD_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "b2800000.cf");  	clk_register_clkdev(clk, NULL, "arasan_xd"); -	clk = clk_register_aux("c3_synth_clk", "c3_synth_gate_clk", -			"vco1div2_clk", 0, SPEAR1310_C3_CLK_SYNT, NULL, -			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "c3_synth_clk", NULL); -	clk_register_clkdev(clk1, "c3_synth_gate_clk", NULL); +	clk = clk_register_aux("c3_syn_clk", "c3_syn_gclk", "vco1div2_clk", +			0, SPEAR1310_C3_CLK_SYNT, NULL, aux_rtbl, +			ARRAY_SIZE(aux_rtbl), &_lock, &clk1); +	clk_register_clkdev(clk, "c3_syn_clk", NULL); +	clk_register_clkdev(clk1, "c3_syn_gclk", NULL); -	clk = clk_register_mux(NULL, "c3_mux_clk", c3_parents, +	clk = clk_register_mux(NULL, "c3_mclk", c3_parents,  			ARRAY_SIZE(c3_parents), 0, SPEAR1310_PERIP_CLK_CFG,  			SPEAR1310_C3_CLK_SHIFT, SPEAR1310_C3_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "c3_mux_clk", NULL); +	clk_register_clkdev(clk, "c3_mclk", NULL); -	clk = clk_register_gate(NULL, "c3_clk", "c3_mux_clk", 0, +	clk = clk_register_gate(NULL, "c3_clk", "c3_mclk", 0,  			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_C3_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "c3");  	/* gmac */ -	clk = clk_register_mux(NULL, "gmac_phy_input_mux_clk", -			gmac_phy_input_parents, +	clk = clk_register_mux(NULL, "phy_input_mclk", gmac_phy_input_parents,  			ARRAY_SIZE(gmac_phy_input_parents), 0,  			SPEAR1310_GMAC_CLK_CFG,  			SPEAR1310_GMAC_PHY_INPUT_CLK_SHIFT,  			SPEAR1310_GMAC_PHY_INPUT_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "gmac_phy_input_mux_clk", NULL); +	clk_register_clkdev(clk, "phy_input_mclk", NULL); -	clk = clk_register_aux("gmac_phy_synth_clk", "gmac_phy_synth_gate_clk", -			"gmac_phy_input_mux_clk", 0, SPEAR1310_GMAC_CLK_SYNT, -			NULL, gmac_rtbl, ARRAY_SIZE(gmac_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "gmac_phy_synth_clk", NULL); -	clk_register_clkdev(clk1, "gmac_phy_synth_gate_clk", NULL); +	clk = clk_register_aux("phy_syn_clk", "phy_syn_gclk", "phy_input_mclk", +			0, SPEAR1310_GMAC_CLK_SYNT, NULL, gmac_rtbl, +			ARRAY_SIZE(gmac_rtbl), &_lock, &clk1); +	clk_register_clkdev(clk, "phy_syn_clk", NULL); +	clk_register_clkdev(clk1, "phy_syn_gclk", NULL); -	clk = clk_register_mux(NULL, "gmac_phy_mux_clk", gmac_phy_parents, +	clk = clk_register_mux(NULL, "phy_mclk", gmac_phy_parents,  			ARRAY_SIZE(gmac_phy_parents), 0,  			SPEAR1310_PERIP_CLK_CFG, SPEAR1310_GMAC_PHY_CLK_SHIFT,  			SPEAR1310_GMAC_PHY_CLK_MASK, 0, &_lock);  	clk_register_clkdev(clk, NULL, "stmmacphy.0");  	/* clcd */ -	clk = clk_register_mux(NULL, "clcd_synth_mux_clk", clcd_synth_parents, +	clk = clk_register_mux(NULL, "clcd_syn_mclk", clcd_synth_parents,  			ARRAY_SIZE(clcd_synth_parents), 0,  			SPEAR1310_CLCD_CLK_SYNT, SPEAR1310_CLCD_SYNT_CLK_SHIFT,  			SPEAR1310_CLCD_SYNT_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "clcd_synth_mux_clk", NULL); +	clk_register_clkdev(clk, "clcd_syn_mclk", NULL); -	clk = clk_register_frac("clcd_synth_clk", "clcd_synth_mux_clk", 0, +	clk = clk_register_frac("clcd_syn_clk", "clcd_syn_mclk", 0,  			SPEAR1310_CLCD_CLK_SYNT, clcd_rtbl,  			ARRAY_SIZE(clcd_rtbl), &_lock); -	clk_register_clkdev(clk, "clcd_synth_clk", NULL); +	clk_register_clkdev(clk, "clcd_syn_clk", NULL); -	clk = clk_register_mux(NULL, "clcd_pixel_mux_clk", clcd_pixel_parents, +	clk = clk_register_mux(NULL, "clcd_pixel_mclk", clcd_pixel_parents,  			ARRAY_SIZE(clcd_pixel_parents), 0,  			SPEAR1310_PERIP_CLK_CFG, SPEAR1310_CLCD_CLK_SHIFT,  			SPEAR1310_CLCD_CLK_MASK, 0, &_lock);  	clk_register_clkdev(clk, "clcd_pixel_clk", NULL); -	clk = clk_register_gate(NULL, "clcd_clk", "clcd_pixel_mux_clk", 0, +	clk = clk_register_gate(NULL, "clcd_clk", "clcd_pixel_mclk", 0,  			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_CLCD_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, "clcd_clk", NULL);  	/* i2s */ -	clk = clk_register_mux(NULL, "i2s_src_mux_clk", i2s_src_parents, +	clk = clk_register_mux(NULL, "i2s_src_mclk", i2s_src_parents,  			ARRAY_SIZE(i2s_src_parents), 0, SPEAR1310_I2S_CLK_CFG,  			SPEAR1310_I2S_SRC_CLK_SHIFT, SPEAR1310_I2S_SRC_CLK_MASK,  			0, &_lock);  	clk_register_clkdev(clk, "i2s_src_clk", NULL); -	clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mux_clk", 0, +	clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mclk", 0,  			SPEAR1310_I2S_CLK_CFG, &i2s_prs1_masks, i2s_prs1_rtbl,  			ARRAY_SIZE(i2s_prs1_rtbl), &_lock, NULL);  	clk_register_clkdev(clk, "i2s_prs1_clk", NULL); -	clk = clk_register_mux(NULL, "i2s_ref_mux_clk", i2s_ref_parents, +	clk = clk_register_mux(NULL, "i2s_ref_mclk", i2s_ref_parents,  			ARRAY_SIZE(i2s_ref_parents), 0, SPEAR1310_I2S_CLK_CFG,  			SPEAR1310_I2S_REF_SHIFT, SPEAR1310_I2S_REF_SEL_MASK, 0,  			&_lock);  	clk_register_clkdev(clk, "i2s_ref_clk", NULL); -	clk = clk_register_gate(NULL, "i2s_ref_pad_clk", "i2s_ref_mux_clk", 0, +	clk = clk_register_gate(NULL, "i2s_ref_pad_clk", "i2s_ref_mclk", 0,  			SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_I2S_REF_PAD_CLK_ENB,  			0, &_lock);  	clk_register_clkdev(clk, "i2s_ref_pad_clk", NULL); -	clk = clk_register_aux("i2s_sclk_clk", "i2s_sclk_gate_clk", +	clk = clk_register_aux("i2s_sclk_clk", "i2s_sclk_gclk",  			"i2s_ref_pad_clk", 0, SPEAR1310_I2S_CLK_CFG,  			&i2s_sclk_masks, i2s_sclk_rtbl,  			ARRAY_SIZE(i2s_sclk_rtbl), &_lock, &clk1);  	clk_register_clkdev(clk, "i2s_sclk_clk", NULL); -	clk_register_clkdev(clk1, "i2s_sclk_gate_clk", NULL); +	clk_register_clkdev(clk1, "i2s_sclk_gclk", NULL);  	/* clock derived from ahb clk */  	clk = clk_register_gate(NULL, "i2c0_clk", "ahb_clk", 0, @@ -747,13 +745,13 @@ void __init spear1310_clk_init(void)  			&_lock);  	clk_register_clkdev(clk, "sysram1_clk", NULL); -	clk = clk_register_aux("adc_synth_clk", "adc_synth_gate_clk", "ahb_clk", +	clk = clk_register_aux("adc_syn_clk", "adc_syn_gclk", "ahb_clk",  			0, SPEAR1310_ADC_CLK_SYNT, NULL, adc_rtbl,  			ARRAY_SIZE(adc_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "adc_synth_clk", NULL); -	clk_register_clkdev(clk1, "adc_synth_gate_clk", NULL); +	clk_register_clkdev(clk, "adc_syn_clk", NULL); +	clk_register_clkdev(clk1, "adc_syn_gclk", NULL); -	clk = clk_register_gate(NULL, "adc_clk", "adc_synth_gate_clk", 0, +	clk = clk_register_gate(NULL, "adc_clk", "adc_syn_gclk", 0,  			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_ADC_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "adc_clk"); @@ -790,37 +788,37 @@ void __init spear1310_clk_init(void)  	clk_register_clkdev(clk, NULL, "e0300000.kbd");  	/* RAS clks */ -	clk = clk_register_mux(NULL, "gen_synth0_1_mux_clk", -			gen_synth0_1_parents, ARRAY_SIZE(gen_synth0_1_parents), -			0, SPEAR1310_PLL_CFG, SPEAR1310_RAS_SYNT0_1_CLK_SHIFT, +	clk = clk_register_mux(NULL, "gen_syn0_1_mclk", gen_synth0_1_parents, +			ARRAY_SIZE(gen_synth0_1_parents), 0, SPEAR1310_PLL_CFG, +			SPEAR1310_RAS_SYNT0_1_CLK_SHIFT,  			SPEAR1310_RAS_SYNT_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "gen_synth0_1_clk", NULL); +	clk_register_clkdev(clk, "gen_syn0_1_clk", NULL); -	clk = clk_register_mux(NULL, "gen_synth2_3_mux_clk", -			gen_synth2_3_parents, ARRAY_SIZE(gen_synth2_3_parents), -			0, SPEAR1310_PLL_CFG, SPEAR1310_RAS_SYNT2_3_CLK_SHIFT, +	clk = clk_register_mux(NULL, "gen_syn2_3_mclk", gen_synth2_3_parents, +			ARRAY_SIZE(gen_synth2_3_parents), 0, SPEAR1310_PLL_CFG, +			SPEAR1310_RAS_SYNT2_3_CLK_SHIFT,  			SPEAR1310_RAS_SYNT_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "gen_synth2_3_clk", NULL); +	clk_register_clkdev(clk, "gen_syn2_3_clk", NULL); -	clk = clk_register_frac("gen_synth0_clk", "gen_synth0_1_clk", 0, +	clk = clk_register_frac("gen_syn0_clk", "gen_syn0_1_clk", 0,  			SPEAR1310_RAS_CLK_SYNT0, gen_rtbl, ARRAY_SIZE(gen_rtbl),  			&_lock); -	clk_register_clkdev(clk, "gen_synth0_clk", NULL); +	clk_register_clkdev(clk, "gen_syn0_clk", NULL); -	clk = clk_register_frac("gen_synth1_clk", "gen_synth0_1_clk", 0, +	clk = clk_register_frac("gen_syn1_clk", "gen_syn0_1_clk", 0,  			SPEAR1310_RAS_CLK_SYNT1, gen_rtbl, ARRAY_SIZE(gen_rtbl),  			&_lock); -	clk_register_clkdev(clk, "gen_synth1_clk", NULL); +	clk_register_clkdev(clk, "gen_syn1_clk", NULL); -	clk = clk_register_frac("gen_synth2_clk", "gen_synth2_3_clk", 0, +	clk = clk_register_frac("gen_syn2_clk", "gen_syn2_3_clk", 0,  			SPEAR1310_RAS_CLK_SYNT2, gen_rtbl, ARRAY_SIZE(gen_rtbl),  			&_lock); -	clk_register_clkdev(clk, "gen_synth2_clk", NULL); +	clk_register_clkdev(clk, "gen_syn2_clk", NULL); -	clk = clk_register_frac("gen_synth3_clk", "gen_synth2_3_clk", 0, +	clk = clk_register_frac("gen_syn3_clk", "gen_syn2_3_clk", 0,  			SPEAR1310_RAS_CLK_SYNT3, gen_rtbl, ARRAY_SIZE(gen_rtbl),  			&_lock); -	clk_register_clkdev(clk, "gen_synth3_clk", NULL); +	clk_register_clkdev(clk, "gen_syn3_clk", NULL);  	clk = clk_register_gate(NULL, "ras_osc_24m_clk", "osc_24m_clk", 0,  			SPEAR1310_RAS_CLK_ENB, SPEAR1310_OSC_24M_CLK_ENB, 0, @@ -847,7 +845,7 @@ void __init spear1310_clk_init(void)  			&_lock);  	clk_register_clkdev(clk, "ras_pll3_clk", NULL); -	clk = clk_register_gate(NULL, "ras_tx125_clk", "gmii_125m_pad_clk", 0, +	clk = clk_register_gate(NULL, "ras_tx125_clk", "gmii_pad_clk", 0,  			SPEAR1310_RAS_CLK_ENB, SPEAR1310_C125M_PAD_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, "ras_tx125_clk", NULL); @@ -912,7 +910,7 @@ void __init spear1310_clk_init(void)  			&_lock);  	clk_register_clkdev(clk, NULL, "5c700000.eth"); -	clk = clk_register_mux(NULL, "smii_rgmii_phy_mux_clk", +	clk = clk_register_mux(NULL, "smii_rgmii_phy_mclk",  			smii_rgmii_phy_parents,  			ARRAY_SIZE(smii_rgmii_phy_parents), 0,  			SPEAR1310_RAS_CTRL_REG1, @@ -922,184 +920,184 @@ void __init spear1310_clk_init(void)  	clk_register_clkdev(clk, NULL, "stmmacphy.2");  	clk_register_clkdev(clk, NULL, "stmmacphy.4"); -	clk = clk_register_mux(NULL, "rmii_phy_mux_clk", rmii_phy_parents, +	clk = clk_register_mux(NULL, "rmii_phy_mclk", rmii_phy_parents,  			ARRAY_SIZE(rmii_phy_parents), 0,  			SPEAR1310_RAS_CTRL_REG1, SPEAR1310_RMII_PHY_CLK_SHIFT,  			SPEAR1310_PHY_CLK_MASK, 0, &_lock);  	clk_register_clkdev(clk, NULL, "stmmacphy.3"); -	clk = clk_register_mux(NULL, "uart1_mux_clk", uart_parents, +	clk = clk_register_mux(NULL, "uart1_mclk", uart_parents,  			ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0,  			SPEAR1310_UART1_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK,  			0, &_lock); -	clk_register_clkdev(clk, "uart1_mux_clk", NULL); +	clk_register_clkdev(clk, "uart1_mclk", NULL); -	clk = clk_register_gate(NULL, "uart1_clk", "uart1_mux_clk", 0, +	clk = clk_register_gate(NULL, "uart1_clk", "uart1_mclk", 0,  			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART1_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "5c800000.serial"); -	clk = clk_register_mux(NULL, "uart2_mux_clk", uart_parents, +	clk = clk_register_mux(NULL, "uart2_mclk", uart_parents,  			ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0,  			SPEAR1310_UART2_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK,  			0, &_lock); -	clk_register_clkdev(clk, "uart2_mux_clk", NULL); +	clk_register_clkdev(clk, "uart2_mclk", NULL); -	clk = clk_register_gate(NULL, "uart2_clk", "uart2_mux_clk", 0, +	clk = clk_register_gate(NULL, "uart2_clk", "uart2_mclk", 0,  			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART2_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "5c900000.serial"); -	clk = clk_register_mux(NULL, "uart3_mux_clk", uart_parents, +	clk = clk_register_mux(NULL, "uart3_mclk", uart_parents,  			ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0,  			SPEAR1310_UART3_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK,  			0, &_lock); -	clk_register_clkdev(clk, "uart3_mux_clk", NULL); +	clk_register_clkdev(clk, "uart3_mclk", NULL); -	clk = clk_register_gate(NULL, "uart3_clk", "uart3_mux_clk", 0, +	clk = clk_register_gate(NULL, "uart3_clk", "uart3_mclk", 0,  			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART3_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "5ca00000.serial"); -	clk = clk_register_mux(NULL, "uart4_mux_clk", uart_parents, +	clk = clk_register_mux(NULL, "uart4_mclk", uart_parents,  			ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0,  			SPEAR1310_UART4_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK,  			0, &_lock); -	clk_register_clkdev(clk, "uart4_mux_clk", NULL); +	clk_register_clkdev(clk, "uart4_mclk", NULL); -	clk = clk_register_gate(NULL, "uart4_clk", "uart4_mux_clk", 0, +	clk = clk_register_gate(NULL, "uart4_clk", "uart4_mclk", 0,  			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART4_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "5cb00000.serial"); -	clk = clk_register_mux(NULL, "uart5_mux_clk", uart_parents, +	clk = clk_register_mux(NULL, "uart5_mclk", uart_parents,  			ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0,  			SPEAR1310_UART5_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK,  			0, &_lock); -	clk_register_clkdev(clk, "uart5_mux_clk", NULL); +	clk_register_clkdev(clk, "uart5_mclk", NULL); -	clk = clk_register_gate(NULL, "uart5_clk", "uart5_mux_clk", 0, +	clk = clk_register_gate(NULL, "uart5_clk", "uart5_mclk", 0,  			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART5_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "5cc00000.serial"); -	clk = clk_register_mux(NULL, "i2c1_mux_clk", i2c_parents, +	clk = clk_register_mux(NULL, "i2c1_mclk", i2c_parents,  			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,  			SPEAR1310_I2C1_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "i2c1_mux_clk", NULL); +	clk_register_clkdev(clk, "i2c1_mclk", NULL); -	clk = clk_register_gate(NULL, "i2c1_clk", "i2c1_mux_clk", 0, +	clk = clk_register_gate(NULL, "i2c1_clk", "i2c1_mclk", 0,  			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C1_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "5cd00000.i2c"); -	clk = clk_register_mux(NULL, "i2c2_mux_clk", i2c_parents, +	clk = clk_register_mux(NULL, "i2c2_mclk", i2c_parents,  			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,  			SPEAR1310_I2C2_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "i2c2_mux_clk", NULL); +	clk_register_clkdev(clk, "i2c2_mclk", NULL); -	clk = clk_register_gate(NULL, "i2c2_clk", "i2c2_mux_clk", 0, +	clk = clk_register_gate(NULL, "i2c2_clk", "i2c2_mclk", 0,  			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C2_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "5ce00000.i2c"); -	clk = clk_register_mux(NULL, "i2c3_mux_clk", i2c_parents, +	clk = clk_register_mux(NULL, "i2c3_mclk", i2c_parents,  			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,  			SPEAR1310_I2C3_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "i2c3_mux_clk", NULL); +	clk_register_clkdev(clk, "i2c3_mclk", NULL); -	clk = clk_register_gate(NULL, "i2c3_clk", "i2c3_mux_clk", 0, +	clk = clk_register_gate(NULL, "i2c3_clk", "i2c3_mclk", 0,  			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C3_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "5cf00000.i2c"); -	clk = clk_register_mux(NULL, "i2c4_mux_clk", i2c_parents, +	clk = clk_register_mux(NULL, "i2c4_mclk", i2c_parents,  			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,  			SPEAR1310_I2C4_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "i2c4_mux_clk", NULL); +	clk_register_clkdev(clk, "i2c4_mclk", NULL); -	clk = clk_register_gate(NULL, "i2c4_clk", "i2c4_mux_clk", 0, +	clk = clk_register_gate(NULL, "i2c4_clk", "i2c4_mclk", 0,  			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C4_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "5d000000.i2c"); -	clk = clk_register_mux(NULL, "i2c5_mux_clk", i2c_parents, +	clk = clk_register_mux(NULL, "i2c5_mclk", i2c_parents,  			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,  			SPEAR1310_I2C5_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "i2c5_mux_clk", NULL); +	clk_register_clkdev(clk, "i2c5_mclk", NULL); -	clk = clk_register_gate(NULL, "i2c5_clk", "i2c5_mux_clk", 0, +	clk = clk_register_gate(NULL, "i2c5_clk", "i2c5_mclk", 0,  			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C5_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "5d100000.i2c"); -	clk = clk_register_mux(NULL, "i2c6_mux_clk", i2c_parents, +	clk = clk_register_mux(NULL, "i2c6_mclk", i2c_parents,  			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,  			SPEAR1310_I2C6_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "i2c6_mux_clk", NULL); +	clk_register_clkdev(clk, "i2c6_mclk", NULL); -	clk = clk_register_gate(NULL, "i2c6_clk", "i2c6_mux_clk", 0, +	clk = clk_register_gate(NULL, "i2c6_clk", "i2c6_mclk", 0,  			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C6_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "5d200000.i2c"); -	clk = clk_register_mux(NULL, "i2c7_mux_clk", i2c_parents, +	clk = clk_register_mux(NULL, "i2c7_mclk", i2c_parents,  			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,  			SPEAR1310_I2C7_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "i2c7_mux_clk", NULL); +	clk_register_clkdev(clk, "i2c7_mclk", NULL); -	clk = clk_register_gate(NULL, "i2c7_clk", "i2c7_mux_clk", 0, +	clk = clk_register_gate(NULL, "i2c7_clk", "i2c7_mclk", 0,  			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C7_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "5d300000.i2c"); -	clk = clk_register_mux(NULL, "ssp1_mux_clk", ssp1_parents, +	clk = clk_register_mux(NULL, "ssp1_mclk", ssp1_parents,  			ARRAY_SIZE(ssp1_parents), 0, SPEAR1310_RAS_CTRL_REG0,  			SPEAR1310_SSP1_CLK_SHIFT, SPEAR1310_SSP1_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "ssp1_mux_clk", NULL); +	clk_register_clkdev(clk, "ssp1_mclk", NULL); -	clk = clk_register_gate(NULL, "ssp1_clk", "ssp1_mux_clk", 0, +	clk = clk_register_gate(NULL, "ssp1_clk", "ssp1_mclk", 0,  			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_SSP1_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "5d400000.spi"); -	clk = clk_register_mux(NULL, "pci_mux_clk", pci_parents, +	clk = clk_register_mux(NULL, "pci_mclk", pci_parents,  			ARRAY_SIZE(pci_parents), 0, SPEAR1310_RAS_CTRL_REG0,  			SPEAR1310_PCI_CLK_SHIFT, SPEAR1310_PCI_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "pci_mux_clk", NULL); +	clk_register_clkdev(clk, "pci_mclk", NULL); -	clk = clk_register_gate(NULL, "pci_clk", "pci_mux_clk", 0, +	clk = clk_register_gate(NULL, "pci_clk", "pci_mclk", 0,  			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_PCI_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "pci"); -	clk = clk_register_mux(NULL, "tdm1_mux_clk", tdm_parents, +	clk = clk_register_mux(NULL, "tdm1_mclk", tdm_parents,  			ARRAY_SIZE(tdm_parents), 0, SPEAR1310_RAS_CTRL_REG0,  			SPEAR1310_TDM1_CLK_SHIFT, SPEAR1310_TDM_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "tdm1_mux_clk", NULL); +	clk_register_clkdev(clk, "tdm1_mclk", NULL); -	clk = clk_register_gate(NULL, "tdm1_clk", "tdm1_mux_clk", 0, +	clk = clk_register_gate(NULL, "tdm1_clk", "tdm1_mclk", 0,  			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_TDM1_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "tdm_hdlc.0"); -	clk = clk_register_mux(NULL, "tdm2_mux_clk", tdm_parents, +	clk = clk_register_mux(NULL, "tdm2_mclk", tdm_parents,  			ARRAY_SIZE(tdm_parents), 0, SPEAR1310_RAS_CTRL_REG0,  			SPEAR1310_TDM2_CLK_SHIFT, SPEAR1310_TDM_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "tdm2_mux_clk", NULL); +	clk_register_clkdev(clk, "tdm2_mclk", NULL); -	clk = clk_register_gate(NULL, "tdm2_clk", "tdm2_mux_clk", 0, +	clk = clk_register_gate(NULL, "tdm2_clk", "tdm2_mclk", 0,  			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_TDM2_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "tdm_hdlc.1"); diff --git a/drivers/clk/spear/spear1340_clock.c b/drivers/clk/spear/spear1340_clock.c index e3ea7216223..2352cee7f64 100644 --- a/drivers/clk/spear/spear1340_clock.c +++ b/drivers/clk/spear/spear1340_clock.c @@ -369,27 +369,25 @@ static struct frac_rate_tbl gen_rtbl[] = {  /* clock parents */  static const char *vco_parents[] = { "osc_24m_clk", "osc_25m_clk", }; -static const char *sys_parents[] = { "none", "pll1_clk", "none", "none", -	"sys_synth_clk", "none", "pll2_clk", "pll3_clk", }; -static const char *ahb_parents[] = { "cpu_div3_clk", "amba_synth_clk", }; +static const char *sys_parents[] = { "pll1_clk", "pll1_clk", "pll1_clk", +	"pll1_clk", "sys_synth_clk", "sys_synth_clk", "pll2_clk", "pll3_clk", }; +static const char *ahb_parents[] = { "cpu_div3_clk", "amba_syn_clk", };  static const char *gpt_parents[] = { "osc_24m_clk", "apb_clk", };  static const char *uart0_parents[] = { "pll5_clk", "osc_24m_clk", -	"uart0_synth_gate_clk", }; +	"uart0_syn_gclk", };  static const char *uart1_parents[] = { "pll5_clk", "osc_24m_clk", -	"uart1_synth_gate_clk", }; -static const char *c3_parents[] = { "pll5_clk", "c3_synth_gate_clk", }; -static const char *gmac_phy_input_parents[] = { "gmii_125m_pad_clk", "pll2_clk", +	"uart1_syn_gclk", }; +static const char *c3_parents[] = { "pll5_clk", "c3_syn_gclk", }; +static const char *gmac_phy_input_parents[] = { "gmii_pad_clk", "pll2_clk",  	"osc_25m_clk", }; -static const char *gmac_phy_parents[] = { "gmac_phy_input_mux_clk", -	"gmac_phy_synth_gate_clk", }; +static const char *gmac_phy_parents[] = { "phy_input_mclk", "phy_syn_gclk", };  static const char *clcd_synth_parents[] = { "vco1div4_clk", "pll2_clk", }; -static const char *clcd_pixel_parents[] = { "pll5_clk", "clcd_synth_clk", }; +static const char *clcd_pixel_parents[] = { "pll5_clk", "clcd_syn_clk", };  static const char *i2s_src_parents[] = { "vco1div2_clk", "pll2_clk", "pll3_clk",  	"i2s_src_pad_clk", }; -static const char *i2s_ref_parents[] = { "i2s_src_mux_clk", "i2s_prs1_clk", }; -static const char *spdif_out_parents[] = { "i2s_src_pad_clk", "gen_synth2_clk", -}; -static const char *spdif_in_parents[] = { "pll2_clk", "gen_synth3_clk", }; +static const char *i2s_ref_parents[] = { "i2s_src_mclk", "i2s_prs1_clk", }; +static const char *spdif_out_parents[] = { "i2s_src_pad_clk", "gen_syn2_clk", }; +static const char *spdif_in_parents[] = { "pll2_clk", "gen_syn3_clk", };  static const char *gen_synth0_1_parents[] = { "vco1div4_clk", "vco3div2_clk",  	"pll3_clk", }; @@ -415,9 +413,9 @@ void __init spear1340_clk_init(void)  			25000000);  	clk_register_clkdev(clk, "osc_25m_clk", NULL); -	clk = clk_register_fixed_rate(NULL, "gmii_125m_pad_clk", NULL, -			CLK_IS_ROOT, 125000000); -	clk_register_clkdev(clk, "gmii_125m_pad_clk", NULL); +	clk = clk_register_fixed_rate(NULL, "gmii_pad_clk", NULL, CLK_IS_ROOT, +			125000000); +	clk_register_clkdev(clk, "gmii_pad_clk", NULL);  	clk = clk_register_fixed_rate(NULL, "i2s_src_pad_clk", NULL,  			CLK_IS_ROOT, 12288000); @@ -431,35 +429,35 @@ void __init spear1340_clk_init(void)  	/* clock derived from 24 or 25 MHz osc clk */  	/* vco-pll */ -	clk = clk_register_mux(NULL, "vco1_mux_clk", vco_parents, +	clk = clk_register_mux(NULL, "vco1_mclk", vco_parents,  			ARRAY_SIZE(vco_parents), 0, SPEAR1340_PLL_CFG,  			SPEAR1340_PLL1_CLK_SHIFT, SPEAR1340_PLL_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "vco1_mux_clk", NULL); -	clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mux_clk", -			0, SPEAR1340_PLL1_CTR, SPEAR1340_PLL1_FRQ, pll_rtbl, +	clk_register_clkdev(clk, "vco1_mclk", NULL); +	clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mclk", 0, +			SPEAR1340_PLL1_CTR, SPEAR1340_PLL1_FRQ, pll_rtbl,  			ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL);  	clk_register_clkdev(clk, "vco1_clk", NULL);  	clk_register_clkdev(clk1, "pll1_clk", NULL); -	clk = clk_register_mux(NULL, "vco2_mux_clk", vco_parents, +	clk = clk_register_mux(NULL, "vco2_mclk", vco_parents,  			ARRAY_SIZE(vco_parents), 0, SPEAR1340_PLL_CFG,  			SPEAR1340_PLL2_CLK_SHIFT, SPEAR1340_PLL_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "vco2_mux_clk", NULL); -	clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mux_clk", -			0, SPEAR1340_PLL2_CTR, SPEAR1340_PLL2_FRQ, pll_rtbl, +	clk_register_clkdev(clk, "vco2_mclk", NULL); +	clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mclk", 0, +			SPEAR1340_PLL2_CTR, SPEAR1340_PLL2_FRQ, pll_rtbl,  			ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL);  	clk_register_clkdev(clk, "vco2_clk", NULL);  	clk_register_clkdev(clk1, "pll2_clk", NULL); -	clk = clk_register_mux(NULL, "vco3_mux_clk", vco_parents, +	clk = clk_register_mux(NULL, "vco3_mclk", vco_parents,  			ARRAY_SIZE(vco_parents), 0, SPEAR1340_PLL_CFG,  			SPEAR1340_PLL3_CLK_SHIFT, SPEAR1340_PLL_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "vco3_mux_clk", NULL); -	clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mux_clk", -			0, SPEAR1340_PLL3_CTR, SPEAR1340_PLL3_FRQ, pll_rtbl, +	clk_register_clkdev(clk, "vco3_mclk", NULL); +	clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mclk", 0, +			SPEAR1340_PLL3_CTR, SPEAR1340_PLL3_FRQ, pll_rtbl,  			ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL);  	clk_register_clkdev(clk, "vco3_clk", NULL);  	clk_register_clkdev(clk1, "pll3_clk", NULL); @@ -498,7 +496,7 @@ void __init spear1340_clk_init(void)  	/* peripherals */  	clk_register_fixed_factor(NULL, "thermal_clk", "osc_24m_clk", 0, 1,  			128); -	clk = clk_register_gate(NULL, "thermal_gate_clk", "thermal_clk", 0, +	clk = clk_register_gate(NULL, "thermal_gclk", "thermal_clk", 0,  			SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_THSENS_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "spear_thermal"); @@ -509,23 +507,23 @@ void __init spear1340_clk_init(void)  	clk_register_clkdev(clk, "ddr_clk", NULL);  	/* clock derived from pll1 clk */ -	clk = clk_register_frac("sys_synth_clk", "vco1div2_clk", 0, +	clk = clk_register_frac("sys_syn_clk", "vco1div2_clk", 0,  			SPEAR1340_SYS_CLK_SYNT, sys_synth_rtbl,  			ARRAY_SIZE(sys_synth_rtbl), &_lock); -	clk_register_clkdev(clk, "sys_synth_clk", NULL); +	clk_register_clkdev(clk, "sys_syn_clk", NULL); -	clk = clk_register_frac("amba_synth_clk", "vco1div2_clk", 0, +	clk = clk_register_frac("amba_syn_clk", "vco1div2_clk", 0,  			SPEAR1340_AMBA_CLK_SYNT, amba_synth_rtbl,  			ARRAY_SIZE(amba_synth_rtbl), &_lock); -	clk_register_clkdev(clk, "amba_synth_clk", NULL); +	clk_register_clkdev(clk, "amba_syn_clk", NULL); -	clk = clk_register_mux(NULL, "sys_mux_clk", sys_parents, +	clk = clk_register_mux(NULL, "sys_mclk", sys_parents,  			ARRAY_SIZE(sys_parents), 0, SPEAR1340_SYS_CLK_CTRL,  			SPEAR1340_SCLK_SRC_SEL_SHIFT,  			SPEAR1340_SCLK_SRC_SEL_MASK, 0, &_lock);  	clk_register_clkdev(clk, "sys_clk", NULL); -	clk = clk_register_fixed_factor(NULL, "cpu_clk", "sys_mux_clk", 0, 1, +	clk = clk_register_fixed_factor(NULL, "cpu_clk", "sys_mclk", 0, 1,  			2);  	clk_register_clkdev(clk, "cpu_clk", NULL); @@ -548,194 +546,193 @@ void __init spear1340_clk_init(void)  	clk_register_clkdev(clk, "apb_clk", NULL);  	/* gpt clocks */ -	clk = clk_register_mux(NULL, "gpt0_mux_clk", gpt_parents, +	clk = clk_register_mux(NULL, "gpt0_mclk", gpt_parents,  			ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG,  			SPEAR1340_GPT0_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "gpt0_mux_clk", NULL); -	clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mux_clk", 0, +	clk_register_clkdev(clk, "gpt0_mclk", NULL); +	clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mclk", 0,  			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_GPT0_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "gpt0"); -	clk = clk_register_mux(NULL, "gpt1_mux_clk", gpt_parents, +	clk = clk_register_mux(NULL, "gpt1_mclk", gpt_parents,  			ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG,  			SPEAR1340_GPT1_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "gpt1_mux_clk", NULL); -	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mux_clk", 0, +	clk_register_clkdev(clk, "gpt1_mclk", NULL); +	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0,  			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_GPT1_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "gpt1"); -	clk = clk_register_mux(NULL, "gpt2_mux_clk", gpt_parents, +	clk = clk_register_mux(NULL, "gpt2_mclk", gpt_parents,  			ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG,  			SPEAR1340_GPT2_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "gpt2_mux_clk", NULL); -	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mux_clk", 0, +	clk_register_clkdev(clk, "gpt2_mclk", NULL); +	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0,  			SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_GPT2_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "gpt2"); -	clk = clk_register_mux(NULL, "gpt3_mux_clk", gpt_parents, +	clk = clk_register_mux(NULL, "gpt3_mclk", gpt_parents,  			ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG,  			SPEAR1340_GPT3_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "gpt3_mux_clk", NULL); -	clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mux_clk", 0, +	clk_register_clkdev(clk, "gpt3_mclk", NULL); +	clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mclk", 0,  			SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_GPT3_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "gpt3");  	/* others */ -	clk = clk_register_aux("uart0_synth_clk", "uart0_synth_gate_clk", +	clk = clk_register_aux("uart0_syn_clk", "uart0_syn_gclk",  			"vco1div2_clk", 0, SPEAR1340_UART0_CLK_SYNT, NULL,  			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "uart0_synth_clk", NULL); -	clk_register_clkdev(clk1, "uart0_synth_gate_clk", NULL); +	clk_register_clkdev(clk, "uart0_syn_clk", NULL); +	clk_register_clkdev(clk1, "uart0_syn_gclk", NULL); -	clk = clk_register_mux(NULL, "uart0_mux_clk", uart0_parents, +	clk = clk_register_mux(NULL, "uart0_mclk", uart0_parents,  			ARRAY_SIZE(uart0_parents), 0, SPEAR1340_PERIP_CLK_CFG,  			SPEAR1340_UART0_CLK_SHIFT, SPEAR1340_UART_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "uart0_mux_clk", NULL); +	clk_register_clkdev(clk, "uart0_mclk", NULL); -	clk = clk_register_gate(NULL, "uart0_clk", "uart0_mux_clk", 0, +	clk = clk_register_gate(NULL, "uart0_clk", "uart0_mclk", 0,  			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_UART0_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "e0000000.serial"); -	clk = clk_register_aux("uart1_synth_clk", "uart1_synth_gate_clk", +	clk = clk_register_aux("uart1_syn_clk", "uart1_syn_gclk",  			"vco1div2_clk", 0, SPEAR1340_UART1_CLK_SYNT, NULL,  			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "uart1_synth_clk", NULL); -	clk_register_clkdev(clk1, "uart1_synth_gate_clk", NULL); +	clk_register_clkdev(clk, "uart1_syn_clk", NULL); +	clk_register_clkdev(clk1, "uart1_syn_gclk", NULL); -	clk = clk_register_mux(NULL, "uart1_mux_clk", uart1_parents, +	clk = clk_register_mux(NULL, "uart1_mclk", uart1_parents,  			ARRAY_SIZE(uart1_parents), 0, SPEAR1340_PERIP_CLK_CFG,  			SPEAR1340_UART1_CLK_SHIFT, SPEAR1340_UART_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "uart1_mux_clk", NULL); +	clk_register_clkdev(clk, "uart1_mclk", NULL); -	clk = clk_register_gate(NULL, "uart1_clk", "uart1_mux_clk", 0, -			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_UART1_CLK_ENB, 0, +	clk = clk_register_gate(NULL, "uart1_clk", "uart1_mclk", 0, +			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_UART1_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "b4100000.serial"); -	clk = clk_register_aux("sdhci_synth_clk", "sdhci_synth_gate_clk", +	clk = clk_register_aux("sdhci_syn_clk", "sdhci_syn_gclk",  			"vco1div2_clk", 0, SPEAR1340_SDHCI_CLK_SYNT, NULL,  			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "sdhci_synth_clk", NULL); -	clk_register_clkdev(clk1, "sdhci_synth_gate_clk", NULL); +	clk_register_clkdev(clk, "sdhci_syn_clk", NULL); +	clk_register_clkdev(clk1, "sdhci_syn_gclk", NULL); -	clk = clk_register_gate(NULL, "sdhci_clk", "sdhci_synth_gate_clk", 0, +	clk = clk_register_gate(NULL, "sdhci_clk", "sdhci_syn_gclk", 0,  			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_SDHCI_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "b3000000.sdhci"); -	clk = clk_register_aux("cfxd_synth_clk", "cfxd_synth_gate_clk", -			"vco1div2_clk", 0, SPEAR1340_CFXD_CLK_SYNT, NULL, -			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "cfxd_synth_clk", NULL); -	clk_register_clkdev(clk1, "cfxd_synth_gate_clk", NULL); +	clk = clk_register_aux("cfxd_syn_clk", "cfxd_syn_gclk", "vco1div2_clk", +			0, SPEAR1340_CFXD_CLK_SYNT, NULL, aux_rtbl, +			ARRAY_SIZE(aux_rtbl), &_lock, &clk1); +	clk_register_clkdev(clk, "cfxd_syn_clk", NULL); +	clk_register_clkdev(clk1, "cfxd_syn_gclk", NULL); -	clk = clk_register_gate(NULL, "cfxd_clk", "cfxd_synth_gate_clk", 0, +	clk = clk_register_gate(NULL, "cfxd_clk", "cfxd_syn_gclk", 0,  			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_CFXD_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "b2800000.cf");  	clk_register_clkdev(clk, NULL, "arasan_xd"); -	clk = clk_register_aux("c3_synth_clk", "c3_synth_gate_clk", -			"vco1div2_clk", 0, SPEAR1340_C3_CLK_SYNT, NULL, -			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "c3_synth_clk", NULL); -	clk_register_clkdev(clk1, "c3_synth_gate_clk", NULL); +	clk = clk_register_aux("c3_syn_clk", "c3_syn_gclk", "vco1div2_clk", 0, +			SPEAR1340_C3_CLK_SYNT, NULL, aux_rtbl, +			ARRAY_SIZE(aux_rtbl), &_lock, &clk1); +	clk_register_clkdev(clk, "c3_syn_clk", NULL); +	clk_register_clkdev(clk1, "c3_syn_gclk", NULL); -	clk = clk_register_mux(NULL, "c3_mux_clk", c3_parents, +	clk = clk_register_mux(NULL, "c3_mclk", c3_parents,  			ARRAY_SIZE(c3_parents), 0, SPEAR1340_PERIP_CLK_CFG,  			SPEAR1340_C3_CLK_SHIFT, SPEAR1340_C3_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "c3_mux_clk", NULL); +	clk_register_clkdev(clk, "c3_mclk", NULL); -	clk = clk_register_gate(NULL, "c3_clk", "c3_mux_clk", 0, +	clk = clk_register_gate(NULL, "c3_clk", "c3_mclk", 0,  			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_C3_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "c3");  	/* gmac */ -	clk = clk_register_mux(NULL, "gmac_phy_input_mux_clk", -			gmac_phy_input_parents, +	clk = clk_register_mux(NULL, "phy_input_mclk", gmac_phy_input_parents,  			ARRAY_SIZE(gmac_phy_input_parents), 0,  			SPEAR1340_GMAC_CLK_CFG,  			SPEAR1340_GMAC_PHY_INPUT_CLK_SHIFT,  			SPEAR1340_GMAC_PHY_INPUT_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "gmac_phy_input_mux_clk", NULL); +	clk_register_clkdev(clk, "phy_input_mclk", NULL); -	clk = clk_register_aux("gmac_phy_synth_clk", "gmac_phy_synth_gate_clk", -			"gmac_phy_input_mux_clk", 0, SPEAR1340_GMAC_CLK_SYNT, -			NULL, gmac_rtbl, ARRAY_SIZE(gmac_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "gmac_phy_synth_clk", NULL); -	clk_register_clkdev(clk1, "gmac_phy_synth_gate_clk", NULL); +	clk = clk_register_aux("phy_syn_clk", "phy_syn_gclk", "phy_input_mclk", +			0, SPEAR1340_GMAC_CLK_SYNT, NULL, gmac_rtbl, +			ARRAY_SIZE(gmac_rtbl), &_lock, &clk1); +	clk_register_clkdev(clk, "phy_syn_clk", NULL); +	clk_register_clkdev(clk1, "phy_syn_gclk", NULL); -	clk = clk_register_mux(NULL, "gmac_phy_mux_clk", gmac_phy_parents, +	clk = clk_register_mux(NULL, "phy_mclk", gmac_phy_parents,  			ARRAY_SIZE(gmac_phy_parents), 0,  			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_GMAC_PHY_CLK_SHIFT,  			SPEAR1340_GMAC_PHY_CLK_MASK, 0, &_lock);  	clk_register_clkdev(clk, NULL, "stmmacphy.0");  	/* clcd */ -	clk = clk_register_mux(NULL, "clcd_synth_mux_clk", clcd_synth_parents, +	clk = clk_register_mux(NULL, "clcd_syn_mclk", clcd_synth_parents,  			ARRAY_SIZE(clcd_synth_parents), 0,  			SPEAR1340_CLCD_CLK_SYNT, SPEAR1340_CLCD_SYNT_CLK_SHIFT,  			SPEAR1340_CLCD_SYNT_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "clcd_synth_mux_clk", NULL); +	clk_register_clkdev(clk, "clcd_syn_mclk", NULL); -	clk = clk_register_frac("clcd_synth_clk", "clcd_synth_mux_clk", 0, +	clk = clk_register_frac("clcd_syn_clk", "clcd_syn_mclk", 0,  			SPEAR1340_CLCD_CLK_SYNT, clcd_rtbl,  			ARRAY_SIZE(clcd_rtbl), &_lock); -	clk_register_clkdev(clk, "clcd_synth_clk", NULL); +	clk_register_clkdev(clk, "clcd_syn_clk", NULL); -	clk = clk_register_mux(NULL, "clcd_pixel_mux_clk", clcd_pixel_parents, +	clk = clk_register_mux(NULL, "clcd_pixel_mclk", clcd_pixel_parents,  			ARRAY_SIZE(clcd_pixel_parents), 0,  			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_CLCD_CLK_SHIFT,  			SPEAR1340_CLCD_CLK_MASK, 0, &_lock);  	clk_register_clkdev(clk, "clcd_pixel_clk", NULL); -	clk = clk_register_gate(NULL, "clcd_clk", "clcd_pixel_mux_clk", 0, +	clk = clk_register_gate(NULL, "clcd_clk", "clcd_pixel_mclk", 0,  			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_CLCD_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, "clcd_clk", NULL);  	/* i2s */ -	clk = clk_register_mux(NULL, "i2s_src_mux_clk", i2s_src_parents, +	clk = clk_register_mux(NULL, "i2s_src_mclk", i2s_src_parents,  			ARRAY_SIZE(i2s_src_parents), 0, SPEAR1340_I2S_CLK_CFG,  			SPEAR1340_I2S_SRC_CLK_SHIFT, SPEAR1340_I2S_SRC_CLK_MASK,  			0, &_lock);  	clk_register_clkdev(clk, "i2s_src_clk", NULL); -	clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mux_clk", 0, +	clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mclk", 0,  			SPEAR1340_I2S_CLK_CFG, &i2s_prs1_masks, i2s_prs1_rtbl,  			ARRAY_SIZE(i2s_prs1_rtbl), &_lock, NULL);  	clk_register_clkdev(clk, "i2s_prs1_clk", NULL); -	clk = clk_register_mux(NULL, "i2s_ref_mux_clk", i2s_ref_parents, +	clk = clk_register_mux(NULL, "i2s_ref_mclk", i2s_ref_parents,  			ARRAY_SIZE(i2s_ref_parents), 0, SPEAR1340_I2S_CLK_CFG,  			SPEAR1340_I2S_REF_SHIFT, SPEAR1340_I2S_REF_SEL_MASK, 0,  			&_lock);  	clk_register_clkdev(clk, "i2s_ref_clk", NULL); -	clk = clk_register_gate(NULL, "i2s_ref_pad_clk", "i2s_ref_mux_clk", 0, +	clk = clk_register_gate(NULL, "i2s_ref_pad_clk", "i2s_ref_mclk", 0,  			SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_I2S_REF_PAD_CLK_ENB,  			0, &_lock);  	clk_register_clkdev(clk, "i2s_ref_pad_clk", NULL); -	clk = clk_register_aux("i2s_sclk_clk", "i2s_sclk_gate_clk", -			"i2s_ref_mux_clk", 0, SPEAR1340_I2S_CLK_CFG, -			&i2s_sclk_masks, i2s_sclk_rtbl, -			ARRAY_SIZE(i2s_sclk_rtbl), &_lock, &clk1); +	clk = clk_register_aux("i2s_sclk_clk", "i2s_sclk_gclk", "i2s_ref_mclk", +			0, SPEAR1340_I2S_CLK_CFG, &i2s_sclk_masks, +			i2s_sclk_rtbl, ARRAY_SIZE(i2s_sclk_rtbl), &_lock, +			&clk1);  	clk_register_clkdev(clk, "i2s_sclk_clk", NULL); -	clk_register_clkdev(clk1, "i2s_sclk_gate_clk", NULL); +	clk_register_clkdev(clk1, "i2s_sclk_gclk", NULL);  	/* clock derived from ahb clk */  	clk = clk_register_gate(NULL, "i2c0_clk", "ahb_clk", 0, @@ -744,7 +741,7 @@ void __init spear1340_clk_init(void)  	clk_register_clkdev(clk, NULL, "e0280000.i2c");  	clk = clk_register_gate(NULL, "i2c1_clk", "ahb_clk", 0, -			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_I2C1_CLK_ENB, 0, +			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_I2C1_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "b4000000.i2c"); @@ -800,13 +797,13 @@ void __init spear1340_clk_init(void)  			&_lock);  	clk_register_clkdev(clk, "sysram1_clk", NULL); -	clk = clk_register_aux("adc_synth_clk", "adc_synth_gate_clk", "ahb_clk", +	clk = clk_register_aux("adc_syn_clk", "adc_syn_gclk", "ahb_clk",  			0, SPEAR1340_ADC_CLK_SYNT, NULL, adc_rtbl,  			ARRAY_SIZE(adc_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "adc_synth_clk", NULL); -	clk_register_clkdev(clk1, "adc_synth_gate_clk", NULL); +	clk_register_clkdev(clk, "adc_syn_clk", NULL); +	clk_register_clkdev(clk1, "adc_syn_gclk", NULL); -	clk = clk_register_gate(NULL, "adc_clk", "adc_synth_gate_clk", 0, +	clk = clk_register_gate(NULL, "adc_clk", "adc_syn_gclk", 0,  			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_ADC_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "adc_clk"); @@ -843,39 +840,39 @@ void __init spear1340_clk_init(void)  	clk_register_clkdev(clk, NULL, "e0300000.kbd");  	/* RAS clks */ -	clk = clk_register_mux(NULL, "gen_synth0_1_mux_clk", -			gen_synth0_1_parents, ARRAY_SIZE(gen_synth0_1_parents), -			0, SPEAR1340_PLL_CFG, SPEAR1340_GEN_SYNT0_1_CLK_SHIFT, +	clk = clk_register_mux(NULL, "gen_syn0_1_mclk", gen_synth0_1_parents, +			ARRAY_SIZE(gen_synth0_1_parents), 0, SPEAR1340_PLL_CFG, +			SPEAR1340_GEN_SYNT0_1_CLK_SHIFT,  			SPEAR1340_GEN_SYNT_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "gen_synth0_1_clk", NULL); +	clk_register_clkdev(clk, "gen_syn0_1_clk", NULL); -	clk = clk_register_mux(NULL, "gen_synth2_3_mux_clk", -			gen_synth2_3_parents, ARRAY_SIZE(gen_synth2_3_parents), -			0, SPEAR1340_PLL_CFG, SPEAR1340_GEN_SYNT2_3_CLK_SHIFT, +	clk = clk_register_mux(NULL, "gen_syn2_3_mclk", gen_synth2_3_parents, +			ARRAY_SIZE(gen_synth2_3_parents), 0, SPEAR1340_PLL_CFG, +			SPEAR1340_GEN_SYNT2_3_CLK_SHIFT,  			SPEAR1340_GEN_SYNT_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "gen_synth2_3_clk", NULL); +	clk_register_clkdev(clk, "gen_syn2_3_clk", NULL); -	clk = clk_register_frac("gen_synth0_clk", "gen_synth0_1_clk", 0, +	clk = clk_register_frac("gen_syn0_clk", "gen_syn0_1_clk", 0,  			SPEAR1340_GEN_CLK_SYNT0, gen_rtbl, ARRAY_SIZE(gen_rtbl),  			&_lock); -	clk_register_clkdev(clk, "gen_synth0_clk", NULL); +	clk_register_clkdev(clk, "gen_syn0_clk", NULL); -	clk = clk_register_frac("gen_synth1_clk", "gen_synth0_1_clk", 0, +	clk = clk_register_frac("gen_syn1_clk", "gen_syn0_1_clk", 0,  			SPEAR1340_GEN_CLK_SYNT1, gen_rtbl, ARRAY_SIZE(gen_rtbl),  			&_lock); -	clk_register_clkdev(clk, "gen_synth1_clk", NULL); +	clk_register_clkdev(clk, "gen_syn1_clk", NULL); -	clk = clk_register_frac("gen_synth2_clk", "gen_synth2_3_clk", 0, +	clk = clk_register_frac("gen_syn2_clk", "gen_syn2_3_clk", 0,  			SPEAR1340_GEN_CLK_SYNT2, gen_rtbl, ARRAY_SIZE(gen_rtbl),  			&_lock); -	clk_register_clkdev(clk, "gen_synth2_clk", NULL); +	clk_register_clkdev(clk, "gen_syn2_clk", NULL); -	clk = clk_register_frac("gen_synth3_clk", "gen_synth2_3_clk", 0, +	clk = clk_register_frac("gen_syn3_clk", "gen_syn2_3_clk", 0,  			SPEAR1340_GEN_CLK_SYNT3, gen_rtbl, ARRAY_SIZE(gen_rtbl),  			&_lock); -	clk_register_clkdev(clk, "gen_synth3_clk", NULL); +	clk_register_clkdev(clk, "gen_syn3_clk", NULL); -	clk = clk_register_gate(NULL, "mali_clk", "gen_synth3_clk", 0, +	clk = clk_register_gate(NULL, "mali_clk", "gen_syn3_clk", 0,  			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_MALI_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "mali"); @@ -890,74 +887,74 @@ void __init spear1340_clk_init(void)  			&_lock);  	clk_register_clkdev(clk, NULL, "spear_cec.1"); -	clk = clk_register_mux(NULL, "spdif_out_mux_clk", spdif_out_parents, +	clk = clk_register_mux(NULL, "spdif_out_mclk", spdif_out_parents,  			ARRAY_SIZE(spdif_out_parents), 0,  			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_SPDIF_OUT_CLK_SHIFT,  			SPEAR1340_SPDIF_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "spdif_out_mux_clk", NULL); +	clk_register_clkdev(clk, "spdif_out_mclk", NULL); -	clk = clk_register_gate(NULL, "spdif_out_clk", "spdif_out_mux_clk", 0, +	clk = clk_register_gate(NULL, "spdif_out_clk", "spdif_out_mclk", 0,  			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_SPDIF_OUT_CLK_ENB,  			0, &_lock);  	clk_register_clkdev(clk, NULL, "spdif-out"); -	clk = clk_register_mux(NULL, "spdif_in_mux_clk", spdif_in_parents, +	clk = clk_register_mux(NULL, "spdif_in_mclk", spdif_in_parents,  			ARRAY_SIZE(spdif_in_parents), 0,  			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_SPDIF_IN_CLK_SHIFT,  			SPEAR1340_SPDIF_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "spdif_in_mux_clk", NULL); +	clk_register_clkdev(clk, "spdif_in_mclk", NULL); -	clk = clk_register_gate(NULL, "spdif_in_clk", "spdif_in_mux_clk", 0, +	clk = clk_register_gate(NULL, "spdif_in_clk", "spdif_in_mclk", 0,  			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_SPDIF_IN_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "spdif-in"); -	clk = clk_register_gate(NULL, "acp_clk", "acp_mux_clk", 0, +	clk = clk_register_gate(NULL, "acp_clk", "acp_mclk", 0,  			SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_ACP_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "acp_clk"); -	clk = clk_register_gate(NULL, "plgpio_clk", "plgpio_mux_clk", 0, +	clk = clk_register_gate(NULL, "plgpio_clk", "plgpio_mclk", 0,  			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_PLGPIO_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "plgpio"); -	clk = clk_register_gate(NULL, "video_dec_clk", "video_dec_mux_clk", 0, +	clk = clk_register_gate(NULL, "video_dec_clk", "video_dec_mclk", 0,  			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_VIDEO_DEC_CLK_ENB,  			0, &_lock);  	clk_register_clkdev(clk, NULL, "video_dec"); -	clk = clk_register_gate(NULL, "video_enc_clk", "video_enc_mux_clk", 0, +	clk = clk_register_gate(NULL, "video_enc_clk", "video_enc_mclk", 0,  			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_VIDEO_ENC_CLK_ENB,  			0, &_lock);  	clk_register_clkdev(clk, NULL, "video_enc"); -	clk = clk_register_gate(NULL, "video_in_clk", "video_in_mux_clk", 0, +	clk = clk_register_gate(NULL, "video_in_clk", "video_in_mclk", 0,  			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_VIDEO_IN_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "spear_vip"); -	clk = clk_register_gate(NULL, "cam0_clk", "cam0_mux_clk", 0, +	clk = clk_register_gate(NULL, "cam0_clk", "cam0_mclk", 0,  			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_CAM0_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "spear_camif.0"); -	clk = clk_register_gate(NULL, "cam1_clk", "cam1_mux_clk", 0, +	clk = clk_register_gate(NULL, "cam1_clk", "cam1_mclk", 0,  			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_CAM1_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "spear_camif.1"); -	clk = clk_register_gate(NULL, "cam2_clk", "cam2_mux_clk", 0, +	clk = clk_register_gate(NULL, "cam2_clk", "cam2_mclk", 0,  			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_CAM2_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "spear_camif.2"); -	clk = clk_register_gate(NULL, "cam3_clk", "cam3_mux_clk", 0, +	clk = clk_register_gate(NULL, "cam3_clk", "cam3_mclk", 0,  			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_CAM3_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "spear_camif.3"); -	clk = clk_register_gate(NULL, "pwm_clk", "pwm_mux_clk", 0, +	clk = clk_register_gate(NULL, "pwm_clk", "pwm_mclk", 0,  			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_PWM_CLK_ENB, 0,  			&_lock);  	clk_register_clkdev(clk, NULL, "pwm"); diff --git a/drivers/clk/spear/spear3xx_clock.c b/drivers/clk/spear/spear3xx_clock.c index 01dd6daff2a..c3157454bb3 100644 --- a/drivers/clk/spear/spear3xx_clock.c +++ b/drivers/clk/spear/spear3xx_clock.c @@ -122,12 +122,12 @@ static struct gpt_rate_tbl gpt_rtbl[] = {  };  /* clock parents */ -static const char *uart0_parents[] = { "pll3_48m_clk", "uart_synth_gate_clk", }; -static const char *firda_parents[] = { "pll3_48m_clk", "firda_synth_gate_clk", +static const char *uart0_parents[] = { "pll3_clk", "uart_syn_gclk", }; +static const char *firda_parents[] = { "pll3_clk", "firda_syn_gclk",  }; -static const char *gpt0_parents[] = { "pll3_48m_clk", "gpt0_synth_clk", }; -static const char *gpt1_parents[] = { "pll3_48m_clk", "gpt1_synth_clk", }; -static const char *gpt2_parents[] = { "pll3_48m_clk", "gpt2_synth_clk", }; +static const char *gpt0_parents[] = { "pll3_clk", "gpt0_syn_clk", }; +static const char *gpt1_parents[] = { "pll3_clk", "gpt1_syn_clk", }; +static const char *gpt2_parents[] = { "pll3_clk", "gpt2_syn_clk", };  static const char *gen2_3_parents[] = { "pll1_clk", "pll2_clk", };  static const char *ddr_parents[] = { "ahb_clk", "ahbmult2_clk", "none",  	"pll2_clk", }; @@ -137,7 +137,7 @@ static void __init spear300_clk_init(void)  {  	struct clk *clk; -	clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_48m_clk", 0, +	clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_clk", 0,  			1, 1);  	clk_register_clkdev(clk, NULL, "60000000.clcd"); @@ -219,15 +219,11 @@ static void __init spear310_clk_init(void)  	#define SPEAR320_UARTX_PCLK_VAL_SYNTH1		0x0  	#define SPEAR320_UARTX_PCLK_VAL_APB		0x1 -static const char *i2s_ref_parents[] = { "ras_pll2_clk", -	"ras_gen2_synth_gate_clk", }; -static const char *sdhci_parents[] = { "ras_pll3_48m_clk", -	"ras_gen3_synth_gate_clk", -}; +static const char *i2s_ref_parents[] = { "ras_pll2_clk", "ras_syn2_gclk", }; +static const char *sdhci_parents[] = { "ras_pll3_clk", "ras_syn3_gclk", };  static const char *smii0_parents[] = { "smii_125m_pad", "ras_pll2_clk", -	"ras_gen0_synth_gate_clk", }; -static const char *uartx_parents[] = { "ras_gen1_synth_gate_clk", "ras_apb_clk", -}; +	"ras_syn0_gclk", }; +static const char *uartx_parents[] = { "ras_syn1_gclk", "ras_apb_clk", };  static void __init spear320_clk_init(void)  { @@ -237,7 +233,7 @@ static void __init spear320_clk_init(void)  			CLK_IS_ROOT, 125000000);  	clk_register_clkdev(clk, "smii_125m_pad", NULL); -	clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_48m_clk", 0, +	clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_clk", 0,  			1, 1);  	clk_register_clkdev(clk, NULL, "90000000.clcd"); @@ -363,9 +359,9 @@ void __init spear3xx_clk_init(void)  	clk_register_clkdev(clk, NULL, "fc900000.rtc");  	/* clock derived from 24 MHz osc clk */ -	clk = clk_register_fixed_rate(NULL, "pll3_48m_clk", "osc_24m_clk", 0, +	clk = clk_register_fixed_rate(NULL, "pll3_clk", "osc_24m_clk", 0,  			48000000); -	clk_register_clkdev(clk, "pll3_48m_clk", NULL); +	clk_register_clkdev(clk, "pll3_clk", NULL);  	clk = clk_register_fixed_factor(NULL, "wdt_clk", "osc_24m_clk", 0, 1,  			1); @@ -392,98 +388,98 @@ void __init spear3xx_clk_init(void)  			HCLK_RATIO_MASK, 0, &_lock);  	clk_register_clkdev(clk, "ahb_clk", NULL); -	clk = clk_register_aux("uart_synth_clk", "uart_synth_gate_clk", -			"pll1_clk", 0, UART_CLK_SYNT, NULL, aux_rtbl, -			ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "uart_synth_clk", NULL); -	clk_register_clkdev(clk1, "uart_synth_gate_clk", NULL); +	clk = clk_register_aux("uart_syn_clk", "uart_syn_gclk", "pll1_clk", 0, +			UART_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), +			&_lock, &clk1); +	clk_register_clkdev(clk, "uart_syn_clk", NULL); +	clk_register_clkdev(clk1, "uart_syn_gclk", NULL); -	clk = clk_register_mux(NULL, "uart0_mux_clk", uart0_parents, +	clk = clk_register_mux(NULL, "uart0_mclk", uart0_parents,  			ARRAY_SIZE(uart0_parents), 0, PERIP_CLK_CFG,  			UART_CLK_SHIFT, UART_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "uart0_mux_clk", NULL); +	clk_register_clkdev(clk, "uart0_mclk", NULL); -	clk = clk_register_gate(NULL, "uart0", "uart0_mux_clk", 0, -			PERIP1_CLK_ENB, UART_CLK_ENB, 0, &_lock); +	clk = clk_register_gate(NULL, "uart0", "uart0_mclk", 0, PERIP1_CLK_ENB, +			UART_CLK_ENB, 0, &_lock);  	clk_register_clkdev(clk, NULL, "d0000000.serial"); -	clk = clk_register_aux("firda_synth_clk", "firda_synth_gate_clk", -			"pll1_clk", 0, FIRDA_CLK_SYNT, NULL, aux_rtbl, -			ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "firda_synth_clk", NULL); -	clk_register_clkdev(clk1, "firda_synth_gate_clk", NULL); +	clk = clk_register_aux("firda_syn_clk", "firda_syn_gclk", "pll1_clk", 0, +			FIRDA_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), +			&_lock, &clk1); +	clk_register_clkdev(clk, "firda_syn_clk", NULL); +	clk_register_clkdev(clk1, "firda_syn_gclk", NULL); -	clk = clk_register_mux(NULL, "firda_mux_clk", firda_parents, +	clk = clk_register_mux(NULL, "firda_mclk", firda_parents,  			ARRAY_SIZE(firda_parents), 0, PERIP_CLK_CFG,  			FIRDA_CLK_SHIFT, FIRDA_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "firda_mux_clk", NULL); +	clk_register_clkdev(clk, "firda_mclk", NULL); -	clk = clk_register_gate(NULL, "firda_clk", "firda_mux_clk", 0, +	clk = clk_register_gate(NULL, "firda_clk", "firda_mclk", 0,  			PERIP1_CLK_ENB, FIRDA_CLK_ENB, 0, &_lock);  	clk_register_clkdev(clk, NULL, "firda");  	/* gpt clocks */ -	clk_register_gpt("gpt0_synth_clk", "pll1_clk", 0, PRSC0_CLK_CFG, -			gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); +	clk_register_gpt("gpt0_syn_clk", "pll1_clk", 0, PRSC0_CLK_CFG, gpt_rtbl, +			ARRAY_SIZE(gpt_rtbl), &_lock);  	clk = clk_register_mux(NULL, "gpt0_clk", gpt0_parents,  			ARRAY_SIZE(gpt0_parents), 0, PERIP_CLK_CFG,  			GPT0_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);  	clk_register_clkdev(clk, NULL, "gpt0"); -	clk_register_gpt("gpt1_synth_clk", "pll1_clk", 0, PRSC1_CLK_CFG, -			gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); -	clk = clk_register_mux(NULL, "gpt1_mux_clk", gpt1_parents, +	clk_register_gpt("gpt1_syn_clk", "pll1_clk", 0, PRSC1_CLK_CFG, gpt_rtbl, +			ARRAY_SIZE(gpt_rtbl), &_lock); +	clk = clk_register_mux(NULL, "gpt1_mclk", gpt1_parents,  			ARRAY_SIZE(gpt1_parents), 0, PERIP_CLK_CFG,  			GPT1_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "gpt1_mux_clk", NULL); -	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mux_clk", 0, +	clk_register_clkdev(clk, "gpt1_mclk", NULL); +	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0,  			PERIP1_CLK_ENB, GPT1_CLK_ENB, 0, &_lock);  	clk_register_clkdev(clk, NULL, "gpt1"); -	clk_register_gpt("gpt2_synth_clk", "pll1_clk", 0, PRSC2_CLK_CFG, -			gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); -	clk = clk_register_mux(NULL, "gpt2_mux_clk", gpt2_parents, +	clk_register_gpt("gpt2_syn_clk", "pll1_clk", 0, PRSC2_CLK_CFG, gpt_rtbl, +			ARRAY_SIZE(gpt_rtbl), &_lock); +	clk = clk_register_mux(NULL, "gpt2_mclk", gpt2_parents,  			ARRAY_SIZE(gpt2_parents), 0, PERIP_CLK_CFG,  			GPT2_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "gpt2_mux_clk", NULL); -	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mux_clk", 0, +	clk_register_clkdev(clk, "gpt2_mclk", NULL); +	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0,  			PERIP1_CLK_ENB, GPT2_CLK_ENB, 0, &_lock);  	clk_register_clkdev(clk, NULL, "gpt2");  	/* general synths clocks */ -	clk = clk_register_aux("gen0_synth_clk", "gen0_synth_gate_clk", -			"pll1_clk", 0, GEN0_CLK_SYNT, NULL, aux_rtbl, -			ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "gen0_synth_clk", NULL); -	clk_register_clkdev(clk1, "gen0_synth_gate_clk", NULL); +	clk = clk_register_aux("gen0_syn_clk", "gen0_syn_gclk", "pll1_clk", +			0, GEN0_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), +			&_lock, &clk1); +	clk_register_clkdev(clk, "gen0_syn_clk", NULL); +	clk_register_clkdev(clk1, "gen0_syn_gclk", NULL); -	clk = clk_register_aux("gen1_synth_clk", "gen1_synth_gate_clk", -			"pll1_clk", 0, GEN1_CLK_SYNT, NULL, aux_rtbl, -			ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "gen1_synth_clk", NULL); -	clk_register_clkdev(clk1, "gen1_synth_gate_clk", NULL); +	clk = clk_register_aux("gen1_syn_clk", "gen1_syn_gclk", "pll1_clk", +			0, GEN1_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), +			&_lock, &clk1); +	clk_register_clkdev(clk, "gen1_syn_clk", NULL); +	clk_register_clkdev(clk1, "gen1_syn_gclk", NULL); -	clk = clk_register_mux(NULL, "gen2_3_parent_clk", gen2_3_parents, +	clk = clk_register_mux(NULL, "gen2_3_par_clk", gen2_3_parents,  			ARRAY_SIZE(gen2_3_parents), 0, CORE_CLK_CFG,  			GEN_SYNTH2_3_CLK_SHIFT, GEN_SYNTH2_3_CLK_MASK, 0,  			&_lock); -	clk_register_clkdev(clk, "gen2_3_parent_clk", NULL); +	clk_register_clkdev(clk, "gen2_3_par_clk", NULL); -	clk = clk_register_aux("gen2_synth_clk", "gen2_synth_gate_clk", -			"gen2_3_parent_clk", 0, GEN2_CLK_SYNT, NULL, aux_rtbl, +	clk = clk_register_aux("gen2_syn_clk", "gen2_syn_gclk", +			"gen2_3_par_clk", 0, GEN2_CLK_SYNT, NULL, aux_rtbl,  			ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "gen2_synth_clk", NULL); -	clk_register_clkdev(clk1, "gen2_synth_gate_clk", NULL); +	clk_register_clkdev(clk, "gen2_syn_clk", NULL); +	clk_register_clkdev(clk1, "gen2_syn_gclk", NULL); -	clk = clk_register_aux("gen3_synth_clk", "gen3_synth_gate_clk", -			"gen2_3_parent_clk", 0, GEN3_CLK_SYNT, NULL, aux_rtbl, +	clk = clk_register_aux("gen3_syn_clk", "gen3_syn_gclk", +			"gen2_3_par_clk", 0, GEN3_CLK_SYNT, NULL, aux_rtbl,  			ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "gen3_synth_clk", NULL); -	clk_register_clkdev(clk1, "gen3_synth_gate_clk", NULL); +	clk_register_clkdev(clk, "gen3_syn_clk", NULL); +	clk_register_clkdev(clk1, "gen3_syn_gclk", NULL);  	/* clock derived from pll3 clk */ -	clk = clk_register_gate(NULL, "usbh_clk", "pll3_48m_clk", 0, -			PERIP1_CLK_ENB, USBH_CLK_ENB, 0, &_lock); +	clk = clk_register_gate(NULL, "usbh_clk", "pll3_clk", 0, PERIP1_CLK_ENB, +			USBH_CLK_ENB, 0, &_lock);  	clk_register_clkdev(clk, "usbh_clk", NULL);  	clk = clk_register_fixed_factor(NULL, "usbh.0_clk", "usbh_clk", 0, 1, @@ -494,8 +490,8 @@ void __init spear3xx_clk_init(void)  			1);  	clk_register_clkdev(clk, "usbh.1_clk", NULL); -	clk = clk_register_gate(NULL, "usbd_clk", "pll3_48m_clk", 0, -			PERIP1_CLK_ENB, USBD_CLK_ENB, 0, &_lock); +	clk = clk_register_gate(NULL, "usbd_clk", "pll3_clk", 0, PERIP1_CLK_ENB, +			USBD_CLK_ENB, 0, &_lock);  	clk_register_clkdev(clk, NULL, "designware_udc");  	/* clock derived from ahb clk */ @@ -579,29 +575,25 @@ void __init spear3xx_clk_init(void)  			RAS_CLK_ENB, RAS_PLL2_CLK_ENB, 0, &_lock);  	clk_register_clkdev(clk, "ras_pll2_clk", NULL); -	clk = clk_register_gate(NULL, "ras_pll3_48m_clk", "pll3_48m_clk", 0, +	clk = clk_register_gate(NULL, "ras_pll3_clk", "pll3_clk", 0,  			RAS_CLK_ENB, RAS_48M_CLK_ENB, 0, &_lock); -	clk_register_clkdev(clk, "ras_pll3_48m_clk", NULL); +	clk_register_clkdev(clk, "ras_pll3_clk", NULL); -	clk = clk_register_gate(NULL, "ras_gen0_synth_gate_clk", -			"gen0_synth_gate_clk", 0, RAS_CLK_ENB, -			RAS_SYNT0_CLK_ENB, 0, &_lock); -	clk_register_clkdev(clk, "ras_gen0_synth_gate_clk", NULL); +	clk = clk_register_gate(NULL, "ras_syn0_gclk", "gen0_syn_gclk", 0, +			RAS_CLK_ENB, RAS_SYNT0_CLK_ENB, 0, &_lock); +	clk_register_clkdev(clk, "ras_syn0_gclk", NULL); -	clk = clk_register_gate(NULL, "ras_gen1_synth_gate_clk", -			"gen1_synth_gate_clk", 0, RAS_CLK_ENB, -			RAS_SYNT1_CLK_ENB, 0, &_lock); -	clk_register_clkdev(clk, "ras_gen1_synth_gate_clk", NULL); +	clk = clk_register_gate(NULL, "ras_syn1_gclk", "gen1_syn_gclk", 0, +			RAS_CLK_ENB, RAS_SYNT1_CLK_ENB, 0, &_lock); +	clk_register_clkdev(clk, "ras_syn1_gclk", NULL); -	clk = clk_register_gate(NULL, "ras_gen2_synth_gate_clk", -			"gen2_synth_gate_clk", 0, RAS_CLK_ENB, -			RAS_SYNT2_CLK_ENB, 0, &_lock); -	clk_register_clkdev(clk, "ras_gen2_synth_gate_clk", NULL); +	clk = clk_register_gate(NULL, "ras_syn2_gclk", "gen2_syn_gclk", 0, +			RAS_CLK_ENB, RAS_SYNT2_CLK_ENB, 0, &_lock); +	clk_register_clkdev(clk, "ras_syn2_gclk", NULL); -	clk = clk_register_gate(NULL, "ras_gen3_synth_gate_clk", -			"gen3_synth_gate_clk", 0, RAS_CLK_ENB, -			RAS_SYNT3_CLK_ENB, 0, &_lock); -	clk_register_clkdev(clk, "ras_gen3_synth_gate_clk", NULL); +	clk = clk_register_gate(NULL, "ras_syn3_gclk", "gen3_syn_gclk", 0, +			RAS_CLK_ENB, RAS_SYNT3_CLK_ENB, 0, &_lock); +	clk_register_clkdev(clk, "ras_syn3_gclk", NULL);  	if (of_machine_is_compatible("st,spear300"))  		spear300_clk_init(); diff --git a/drivers/clk/spear/spear6xx_clock.c b/drivers/clk/spear/spear6xx_clock.c index 61026ae564a..a98d0866f54 100644 --- a/drivers/clk/spear/spear6xx_clock.c +++ b/drivers/clk/spear/spear6xx_clock.c @@ -97,13 +97,12 @@ static struct aux_rate_tbl aux_rtbl[] = {  	{.xscale = 1, .yscale = 2, .eq = 1}, /* 166 MHz */  }; -static const char *clcd_parents[] = { "pll3_48m_clk", "clcd_synth_gate_clk", }; -static const char *firda_parents[] = { "pll3_48m_clk", "firda_synth_gate_clk", -}; -static const char *uart_parents[] = { "pll3_48m_clk", "uart_synth_gate_clk", }; -static const char *gpt0_1_parents[] = { "pll3_48m_clk", "gpt0_1_synth_clk", }; -static const char *gpt2_parents[] = { "pll3_48m_clk", "gpt2_synth_clk", }; -static const char *gpt3_parents[] = { "pll3_48m_clk", "gpt3_synth_clk", }; +static const char *clcd_parents[] = { "pll3_clk", "clcd_syn_gclk", }; +static const char *firda_parents[] = { "pll3_clk", "firda_syn_gclk", }; +static const char *uart_parents[] = { "pll3_clk", "uart_syn_gclk", }; +static const char *gpt0_1_parents[] = { "pll3_clk", "gpt0_1_syn_clk", }; +static const char *gpt2_parents[] = { "pll3_clk", "gpt2_syn_clk", }; +static const char *gpt3_parents[] = { "pll3_clk", "gpt3_syn_clk", };  static const char *ddr_parents[] = { "ahb_clk", "ahbmult2_clk", "none",  	"pll2_clk", }; @@ -136,9 +135,9 @@ void __init spear6xx_clk_init(void)  	clk_register_clkdev(clk, NULL, "rtc-spear");  	/* clock derived from 30 MHz osc clk */ -	clk = clk_register_fixed_rate(NULL, "pll3_48m_clk", "osc_24m_clk", 0, +	clk = clk_register_fixed_rate(NULL, "pll3_clk", "osc_24m_clk", 0,  			48000000); -	clk_register_clkdev(clk, "pll3_48m_clk", NULL); +	clk_register_clkdev(clk, "pll3_clk", NULL);  	clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "osc_30m_clk",  			0, PLL1_CTR, PLL1_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), @@ -146,9 +145,9 @@ void __init spear6xx_clk_init(void)  	clk_register_clkdev(clk, "vco1_clk", NULL);  	clk_register_clkdev(clk1, "pll1_clk", NULL); -	clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, -			"osc_30m_clk", 0, PLL2_CTR, PLL2_FRQ, pll_rtbl, -			ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL); +	clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "osc_30m_clk", +			0, PLL2_CTR, PLL2_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), +			&_lock, &clk1, NULL);  	clk_register_clkdev(clk, "vco2_clk", NULL);  	clk_register_clkdev(clk1, "pll2_clk", NULL); @@ -165,111 +164,111 @@ void __init spear6xx_clk_init(void)  			HCLK_RATIO_MASK, 0, &_lock);  	clk_register_clkdev(clk, "ahb_clk", NULL); -	clk = clk_register_aux("uart_synth_clk", "uart_synth_gate_clk", -			"pll1_clk", 0, UART_CLK_SYNT, NULL, aux_rtbl, -			ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "uart_synth_clk", NULL); -	clk_register_clkdev(clk1, "uart_synth_gate_clk", NULL); +	clk = clk_register_aux("uart_syn_clk", "uart_syn_gclk", "pll1_clk", 0, +			UART_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), +			&_lock, &clk1); +	clk_register_clkdev(clk, "uart_syn_clk", NULL); +	clk_register_clkdev(clk1, "uart_syn_gclk", NULL); -	clk = clk_register_mux(NULL, "uart_mux_clk", uart_parents, +	clk = clk_register_mux(NULL, "uart_mclk", uart_parents,  			ARRAY_SIZE(uart_parents), 0, PERIP_CLK_CFG,  			UART_CLK_SHIFT, UART_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "uart_mux_clk", NULL); +	clk_register_clkdev(clk, "uart_mclk", NULL); -	clk = clk_register_gate(NULL, "uart0", "uart_mux_clk", 0, -			PERIP1_CLK_ENB, UART0_CLK_ENB, 0, &_lock); +	clk = clk_register_gate(NULL, "uart0", "uart_mclk", 0, PERIP1_CLK_ENB, +			UART0_CLK_ENB, 0, &_lock);  	clk_register_clkdev(clk, NULL, "d0000000.serial"); -	clk = clk_register_gate(NULL, "uart1", "uart_mux_clk", 0, -			PERIP1_CLK_ENB, UART1_CLK_ENB, 0, &_lock); +	clk = clk_register_gate(NULL, "uart1", "uart_mclk", 0, PERIP1_CLK_ENB, +			UART1_CLK_ENB, 0, &_lock);  	clk_register_clkdev(clk, NULL, "d0080000.serial"); -	clk = clk_register_aux("firda_synth_clk", "firda_synth_gate_clk", -			"pll1_clk", 0, FIRDA_CLK_SYNT, NULL, aux_rtbl, -			ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "firda_synth_clk", NULL); -	clk_register_clkdev(clk1, "firda_synth_gate_clk", NULL); +	clk = clk_register_aux("firda_syn_clk", "firda_syn_gclk", "pll1_clk", +			0, FIRDA_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), +			&_lock, &clk1); +	clk_register_clkdev(clk, "firda_syn_clk", NULL); +	clk_register_clkdev(clk1, "firda_syn_gclk", NULL); -	clk = clk_register_mux(NULL, "firda_mux_clk", firda_parents, +	clk = clk_register_mux(NULL, "firda_mclk", firda_parents,  			ARRAY_SIZE(firda_parents), 0, PERIP_CLK_CFG,  			FIRDA_CLK_SHIFT, FIRDA_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "firda_mux_clk", NULL); +	clk_register_clkdev(clk, "firda_mclk", NULL); -	clk = clk_register_gate(NULL, "firda_clk", "firda_mux_clk", 0, +	clk = clk_register_gate(NULL, "firda_clk", "firda_mclk", 0,  			PERIP1_CLK_ENB, FIRDA_CLK_ENB, 0, &_lock);  	clk_register_clkdev(clk, NULL, "firda"); -	clk = clk_register_aux("clcd_synth_clk", "clcd_synth_gate_clk", -			"pll1_clk", 0, CLCD_CLK_SYNT, NULL, aux_rtbl, -			ARRAY_SIZE(aux_rtbl), &_lock, &clk1); -	clk_register_clkdev(clk, "clcd_synth_clk", NULL); -	clk_register_clkdev(clk1, "clcd_synth_gate_clk", NULL); +	clk = clk_register_aux("clcd_syn_clk", "clcd_syn_gclk", "pll1_clk", +			0, CLCD_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), +			&_lock, &clk1); +	clk_register_clkdev(clk, "clcd_syn_clk", NULL); +	clk_register_clkdev(clk1, "clcd_syn_gclk", NULL); -	clk = clk_register_mux(NULL, "clcd_mux_clk", clcd_parents, +	clk = clk_register_mux(NULL, "clcd_mclk", clcd_parents,  			ARRAY_SIZE(clcd_parents), 0, PERIP_CLK_CFG,  			CLCD_CLK_SHIFT, CLCD_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "clcd_mux_clk", NULL); +	clk_register_clkdev(clk, "clcd_mclk", NULL); -	clk = clk_register_gate(NULL, "clcd_clk", "clcd_mux_clk", 0, +	clk = clk_register_gate(NULL, "clcd_clk", "clcd_mclk", 0,  			PERIP1_CLK_ENB, CLCD_CLK_ENB, 0, &_lock);  	clk_register_clkdev(clk, NULL, "clcd");  	/* gpt clocks */ -	clk = clk_register_gpt("gpt0_1_synth_clk", "pll1_clk", 0, PRSC0_CLK_CFG, +	clk = clk_register_gpt("gpt0_1_syn_clk", "pll1_clk", 0, PRSC0_CLK_CFG,  			gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); -	clk_register_clkdev(clk, "gpt0_1_synth_clk", NULL); +	clk_register_clkdev(clk, "gpt0_1_syn_clk", NULL); -	clk = clk_register_mux(NULL, "gpt0_mux_clk", gpt0_1_parents, +	clk = clk_register_mux(NULL, "gpt0_mclk", gpt0_1_parents,  			ARRAY_SIZE(gpt0_1_parents), 0, PERIP_CLK_CFG,  			GPT0_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);  	clk_register_clkdev(clk, NULL, "gpt0"); -	clk = clk_register_mux(NULL, "gpt1_mux_clk", gpt0_1_parents, +	clk = clk_register_mux(NULL, "gpt1_mclk", gpt0_1_parents,  			ARRAY_SIZE(gpt0_1_parents), 0, PERIP_CLK_CFG,  			GPT1_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "gpt1_mux_clk", NULL); +	clk_register_clkdev(clk, "gpt1_mclk", NULL); -	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mux_clk", 0, +	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0,  			PERIP1_CLK_ENB, GPT1_CLK_ENB, 0, &_lock);  	clk_register_clkdev(clk, NULL, "gpt1"); -	clk = clk_register_gpt("gpt2_synth_clk", "pll1_clk", 0, PRSC1_CLK_CFG, +	clk = clk_register_gpt("gpt2_syn_clk", "pll1_clk", 0, PRSC1_CLK_CFG,  			gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); -	clk_register_clkdev(clk, "gpt2_synth_clk", NULL); +	clk_register_clkdev(clk, "gpt2_syn_clk", NULL); -	clk = clk_register_mux(NULL, "gpt2_mux_clk", gpt2_parents, +	clk = clk_register_mux(NULL, "gpt2_mclk", gpt2_parents,  			ARRAY_SIZE(gpt2_parents), 0, PERIP_CLK_CFG,  			GPT2_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "gpt2_mux_clk", NULL); +	clk_register_clkdev(clk, "gpt2_mclk", NULL); -	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mux_clk", 0, +	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0,  			PERIP1_CLK_ENB, GPT2_CLK_ENB, 0, &_lock);  	clk_register_clkdev(clk, NULL, "gpt2"); -	clk = clk_register_gpt("gpt3_synth_clk", "pll1_clk", 0, PRSC2_CLK_CFG, +	clk = clk_register_gpt("gpt3_syn_clk", "pll1_clk", 0, PRSC2_CLK_CFG,  			gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); -	clk_register_clkdev(clk, "gpt3_synth_clk", NULL); +	clk_register_clkdev(clk, "gpt3_syn_clk", NULL); -	clk = clk_register_mux(NULL, "gpt3_mux_clk", gpt3_parents, +	clk = clk_register_mux(NULL, "gpt3_mclk", gpt3_parents,  			ARRAY_SIZE(gpt3_parents), 0, PERIP_CLK_CFG,  			GPT3_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); -	clk_register_clkdev(clk, "gpt3_mux_clk", NULL); +	clk_register_clkdev(clk, "gpt3_mclk", NULL); -	clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mux_clk", 0, +	clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mclk", 0,  			PERIP1_CLK_ENB, GPT3_CLK_ENB, 0, &_lock);  	clk_register_clkdev(clk, NULL, "gpt3");  	/* clock derived from pll3 clk */ -	clk = clk_register_gate(NULL, "usbh0_clk", "pll3_48m_clk", 0, +	clk = clk_register_gate(NULL, "usbh0_clk", "pll3_clk", 0,  			PERIP1_CLK_ENB, USBH0_CLK_ENB, 0, &_lock);  	clk_register_clkdev(clk, NULL, "usbh.0_clk"); -	clk = clk_register_gate(NULL, "usbh1_clk", "pll3_48m_clk", 0, +	clk = clk_register_gate(NULL, "usbh1_clk", "pll3_clk", 0,  			PERIP1_CLK_ENB, USBH1_CLK_ENB, 0, &_lock);  	clk_register_clkdev(clk, NULL, "usbh.1_clk"); -	clk = clk_register_gate(NULL, "usbd_clk", "pll3_48m_clk", 0, -			PERIP1_CLK_ENB, USBD_CLK_ENB, 0, &_lock); +	clk = clk_register_gate(NULL, "usbd_clk", "pll3_clk", 0, PERIP1_CLK_ENB, +			USBD_CLK_ENB, 0, &_lock);  	clk_register_clkdev(clk, NULL, "designware_udc");  	/* clock derived from ahb clk */ @@ -278,9 +277,8 @@ void __init spear6xx_clk_init(void)  	clk_register_clkdev(clk, "ahbmult2_clk", NULL);  	clk = clk_register_mux(NULL, "ddr_clk", ddr_parents, -			ARRAY_SIZE(ddr_parents), -			0, PLL_CLK_CFG, MCTR_CLK_SHIFT, MCTR_CLK_MASK, 0, -			&_lock); +			ARRAY_SIZE(ddr_parents), 0, PLL_CLK_CFG, MCTR_CLK_SHIFT, +			MCTR_CLK_MASK, 0, &_lock);  	clk_register_clkdev(clk, "ddr_clk", NULL);  	clk = clk_register_divider(NULL, "apb_clk", "ahb_clk", diff --git a/drivers/gpu/drm/gma500/cdv_device.c b/drivers/gpu/drm/gma500/cdv_device.c index 9764045428c..b7e7b49d8f6 100644 --- a/drivers/gpu/drm/gma500/cdv_device.c +++ b/drivers/gpu/drm/gma500/cdv_device.c @@ -78,21 +78,6 @@ static int cdv_backlight_combination_mode(struct drm_device *dev)  	return REG_READ(BLC_PWM_CTL2) & PWM_LEGACY_MODE;  } -static int cdv_get_brightness(struct backlight_device *bd) -{ -	struct drm_device *dev = bl_get_data(bd); -	u32 val = REG_READ(BLC_PWM_CTL) & BACKLIGHT_DUTY_CYCLE_MASK; - -	if (cdv_backlight_combination_mode(dev)) { -		u8 lbpc; - -		val &= ~1; -		pci_read_config_byte(dev->pdev, 0xF4, &lbpc); -		val *= lbpc; -	} -	return val; -} -  static u32 cdv_get_max_backlight(struct drm_device *dev)  {  	u32 max = REG_READ(BLC_PWM_CTL); @@ -110,6 +95,22 @@ static u32 cdv_get_max_backlight(struct drm_device *dev)  	return max;  } +static int cdv_get_brightness(struct backlight_device *bd) +{ +	struct drm_device *dev = bl_get_data(bd); +	u32 val = REG_READ(BLC_PWM_CTL) & BACKLIGHT_DUTY_CYCLE_MASK; + +	if (cdv_backlight_combination_mode(dev)) { +		u8 lbpc; + +		val &= ~1; +		pci_read_config_byte(dev->pdev, 0xF4, &lbpc); +		val *= lbpc; +	} +	return (val * 100)/cdv_get_max_backlight(dev); + +} +  static int cdv_set_brightness(struct backlight_device *bd)  {  	struct drm_device *dev = bl_get_data(bd); @@ -120,6 +121,9 @@ static int cdv_set_brightness(struct backlight_device *bd)  	if (level < 1)  		level = 1; +	level *= cdv_get_max_backlight(dev); +	level /= 100; +  	if (cdv_backlight_combination_mode(dev)) {  		u32 max = cdv_get_max_backlight(dev);  		u8 lbpc; @@ -157,7 +161,6 @@ static int cdv_backlight_init(struct drm_device *dev)  	cdv_backlight_device->props.brightness =  			cdv_get_brightness(cdv_backlight_device); -	cdv_backlight_device->props.max_brightness = cdv_get_max_backlight(dev);  	backlight_update_status(cdv_backlight_device);  	dev_priv->backlight_device = cdv_backlight_device;  	return 0; diff --git a/drivers/gpu/drm/gma500/opregion.c b/drivers/gpu/drm/gma500/opregion.c index 4f186eca3a3..c430bd42468 100644 --- a/drivers/gpu/drm/gma500/opregion.c +++ b/drivers/gpu/drm/gma500/opregion.c @@ -144,6 +144,8 @@ struct opregion_asle {  #define ASLE_CBLV_VALID         (1<<31) +static struct psb_intel_opregion *system_opregion; +  static u32 asle_set_backlight(struct drm_device *dev, u32 bclp)  {  	struct drm_psb_private *dev_priv = dev->dev_private; @@ -205,7 +207,7 @@ void psb_intel_opregion_enable_asle(struct drm_device *dev)  	struct drm_psb_private *dev_priv = dev->dev_private;  	struct opregion_asle *asle = dev_priv->opregion.asle; -	if (asle) { +	if (asle && system_opregion ) {  		/* Don't do this on Medfield or other non PC like devices, they  		   use the bit for something different altogether */  		psb_enable_pipestat(dev_priv, 0, PIPE_LEGACY_BLC_EVENT_ENABLE); @@ -221,7 +223,6 @@ void psb_intel_opregion_enable_asle(struct drm_device *dev)  #define ACPI_EV_LID            (1<<1)  #define ACPI_EV_DOCK           (1<<2) -static struct psb_intel_opregion *system_opregion;  static int psb_intel_opregion_video_event(struct notifier_block *nb,  					  unsigned long val, void *data) @@ -266,9 +267,6 @@ void psb_intel_opregion_init(struct drm_device *dev)  		system_opregion = opregion;  		register_acpi_notifier(&psb_intel_opregion_notifier);  	} - -	if (opregion->asle) -		psb_intel_opregion_enable_asle(dev);  }  void psb_intel_opregion_fini(struct drm_device *dev) diff --git a/drivers/gpu/drm/gma500/opregion.h b/drivers/gpu/drm/gma500/opregion.h index 72dc6b92126..4a90f8b0e16 100644 --- a/drivers/gpu/drm/gma500/opregion.h +++ b/drivers/gpu/drm/gma500/opregion.h @@ -27,6 +27,7 @@ extern void psb_intel_opregion_asle_intr(struct drm_device *dev);  extern void psb_intel_opregion_init(struct drm_device *dev);  extern void psb_intel_opregion_fini(struct drm_device *dev);  extern int psb_intel_opregion_setup(struct drm_device *dev); +extern void psb_intel_opregion_enable_asle(struct drm_device *dev);  #else @@ -46,4 +47,8 @@ extern inline int psb_intel_opregion_setup(struct drm_device *dev)  {  	return 0;  } + +extern inline void psb_intel_opregion_enable_asle(struct drm_device *dev) +{ +}  #endif diff --git a/drivers/gpu/drm/gma500/psb_device.c b/drivers/gpu/drm/gma500/psb_device.c index eff039bf92d..5971bc82b76 100644 --- a/drivers/gpu/drm/gma500/psb_device.c +++ b/drivers/gpu/drm/gma500/psb_device.c @@ -144,6 +144,10 @@ static int psb_backlight_init(struct drm_device *dev)  	psb_backlight_device->props.max_brightness = 100;  	backlight_update_status(psb_backlight_device);  	dev_priv->backlight_device = psb_backlight_device; + +	/* This must occur after the backlight is properly initialised */ +	psb_lid_timer_init(dev_priv); +  	return 0;  } @@ -354,13 +358,6 @@ static int psb_chip_setup(struct drm_device *dev)  	return 0;  } -/* Not exactly an erratum more an irritation */ -static void psb_chip_errata(struct drm_device *dev) -{ -	struct drm_psb_private *dev_priv = dev->dev_private; -	psb_lid_timer_init(dev_priv); -} -  static void psb_chip_teardown(struct drm_device *dev)  {  	struct drm_psb_private *dev_priv = dev->dev_private; @@ -379,7 +376,6 @@ const struct psb_ops psb_chip_ops = {  	.sgx_offset = PSB_SGX_OFFSET,  	.chip_setup = psb_chip_setup,  	.chip_teardown = psb_chip_teardown, -	.errata = psb_chip_errata,  	.crtc_helper = &psb_intel_helper_funcs,  	.crtc_funcs = &psb_intel_crtc_funcs, diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c index caba6e08693..a8858a907f4 100644 --- a/drivers/gpu/drm/gma500/psb_drv.c +++ b/drivers/gpu/drm/gma500/psb_drv.c @@ -374,6 +374,7 @@ static int psb_driver_load(struct drm_device *dev, unsigned long chipset)  	if (ret)  		return ret; +	psb_intel_opregion_enable_asle(dev);  #if 0  	/*enable runtime pm at last*/  	pm_runtime_enable(&dev->pdev->dev); diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index bef04c19276..3fda8c87f02 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -386,6 +386,7 @@ config HID_MULTITOUCH  	  - Unitec Panels  	  - XAT optical touch panels  	  - Xiroku optical touch panels +	  - Zytronic touch panels  	  If unsure, say N. diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 875ff451842..32039235cfe 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -659,6 +659,9 @@  #define USB_DEVICE_ID_SAMSUNG_IR_REMOTE	0x0001  #define USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE	0x0600 +#define USB_VENDOR_ID_SENNHEISER	0x1395 +#define USB_DEVICE_ID_SENNHEISER_BTD500USB	0x002c +  #define USB_VENDOR_ID_SIGMA_MICRO	0x1c4f  #define USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD	0x0002 @@ -808,6 +811,9 @@  #define USB_VENDOR_ID_ZYDACRON	0x13EC  #define USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL	0x0006 +#define USB_VENDOR_ID_ZYTRONIC		0x14c8 +#define USB_DEVICE_ID_ZYTRONIC_ZXY100	0x0005 +  #define USB_VENDOR_ID_PRIMAX	0x0461  #define USB_DEVICE_ID_PRIMAX_KEYBOARD	0x4e05 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 132b0019365..5301006f6c1 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -301,6 +301,9 @@ static const struct hid_device_id hid_battery_quirks[] = {  	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,  			       USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ANSI),  	  HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, +	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, +		USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI), +	  HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE },  	{}  }; diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 6e3332a9997..76479246d4e 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1048,6 +1048,11 @@ static const struct hid_device_id mt_devices[] = {  		MT_USB_DEVICE(USB_VENDOR_ID_XIROKU,  			USB_DEVICE_ID_XIROKU_CSR2) }, +	/* Zytronic panels */ +	{ .driver_data = MT_CLS_SERIAL, +		MT_USB_DEVICE(USB_VENDOR_ID_ZYTRONIC, +			USB_DEVICE_ID_ZYTRONIC_ZXY100) }, +  	/* Generic MT device */  	{ HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH, HID_ANY_ID, HID_ANY_ID) },  	{ } diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 0597ee604f6..903eef3d3e1 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -76,6 +76,7 @@ static const struct hid_blacklist {  	{ USB_VENDOR_ID_PRODIGE, USB_DEVICE_ID_PRODIGE_CORDLESS, HID_QUIRK_NOGET },  	{ USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_PIXART_IMAGING_INC_OPTICAL_TOUCH_SCREEN, HID_QUIRK_NOGET },  	{ USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008, HID_QUIRK_NOGET }, +	{ USB_VENDOR_ID_SENNHEISER, USB_DEVICE_ID_SENNHEISER_BTD500USB, HID_QUIRK_NOGET },  	{ USB_VENDOR_ID_SUN, USB_DEVICE_ID_RARITAN_KVM_DONGLE, HID_QUIRK_NOGET },  	{ USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_1, HID_QUIRK_NOGET },  	{ USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_2, HID_QUIRK_NOGET }, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 5c1bc995e56..f10221f4080 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -123,7 +123,7 @@ static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv,  		skb_frag_size_set(frag, size);  		skb->data_len += size; -		skb->truesize += size; +		skb->truesize += PAGE_SIZE;  	} else  		skb_put(skb, length); @@ -156,14 +156,18 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)  	struct ipoib_dev_priv *priv = netdev_priv(dev);  	struct sk_buff *skb;  	int buf_size; +	int tailroom;  	u64 *mapping; -	if (ipoib_ud_need_sg(priv->max_ib_mtu)) +	if (ipoib_ud_need_sg(priv->max_ib_mtu)) {  		buf_size = IPOIB_UD_HEAD_SIZE; -	else +		tailroom = 128; /* reserve some tailroom for IP/TCP headers */ +	} else {  		buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); +		tailroom = 0; +	} -	skb = dev_alloc_skb(buf_size + 4); +	skb = dev_alloc_skb(buf_size + tailroom + 4);  	if (unlikely(!skb))  		return NULL; diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 3a74e4410fc..86e2f4a62b9 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -26,6 +26,8 @@   * These routines are used by both DMA-remapping and Interrupt-remapping   */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* has to precede printk.h */ +  #include <linux/pci.h>  #include <linux/dmar.h>  #include <linux/iova.h> @@ -39,8 +41,6 @@  #include <asm/irq_remapping.h>  #include <asm/iommu_table.h> -#define PREFIX "DMAR: " -  /* No locks are needed as DMA remapping hardware unit   * list is constructed at boot time and hotplug of   * these units are not supported by the architecture. @@ -83,16 +83,12 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,  		 * ignore it  		 */  		if (!bus) { -			printk(KERN_WARNING -			PREFIX "Device scope bus [%d] not found\n", -			scope->bus); +			pr_warn("Device scope bus [%d] not found\n", scope->bus);  			break;  		}  		pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));  		if (!pdev) { -			printk(KERN_WARNING PREFIX -			"Device scope device [%04x:%02x:%02x.%02x] not found\n", -				segment, bus->number, path->dev, path->fn); +			/* warning will be printed below */  			break;  		}  		path ++; @@ -100,9 +96,8 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,  		bus = pdev->subordinate;  	}  	if (!pdev) { -		printk(KERN_WARNING PREFIX -		"Device scope device [%04x:%02x:%02x.%02x] not found\n", -		segment, scope->bus, path->dev, path->fn); +		pr_warn("Device scope device [%04x:%02x:%02x.%02x] not found\n", +			segment, scope->bus, path->dev, path->fn);  		*dev = NULL;  		return 0;  	} @@ -110,9 +105,8 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,  			pdev->subordinate) || (scope->entry_type == \  			ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {  		pci_dev_put(pdev); -		printk(KERN_WARNING PREFIX -			"Device scope type does not match for %s\n", -			 pci_name(pdev)); +		pr_warn("Device scope type does not match for %s\n", +			pci_name(pdev));  		return -EINVAL;  	}  	*dev = pdev; @@ -134,8 +128,7 @@ int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,  		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)  			(*cnt)++;  		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) { -			printk(KERN_WARNING PREFIX -			       "Unsupported device scope\n"); +			pr_warn("Unsupported device scope\n");  		}  		start += scope->length;  	} @@ -261,25 +254,23 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)  	case ACPI_DMAR_TYPE_HARDWARE_UNIT:  		drhd = container_of(header, struct acpi_dmar_hardware_unit,  				    header); -		printk (KERN_INFO PREFIX -			"DRHD base: %#016Lx flags: %#x\n", +		pr_info("DRHD base: %#016Lx flags: %#x\n",  			(unsigned long long)drhd->address, drhd->flags);  		break;  	case ACPI_DMAR_TYPE_RESERVED_MEMORY:  		rmrr = container_of(header, struct acpi_dmar_reserved_memory,  				    header); -		printk (KERN_INFO PREFIX -			"RMRR base: %#016Lx end: %#016Lx\n", +		pr_info("RMRR base: %#016Lx end: %#016Lx\n",  			(unsigned long long)rmrr->base_address,  			(unsigned long long)rmrr->end_address);  		break;  	case ACPI_DMAR_TYPE_ATSR:  		atsr = container_of(header, struct acpi_dmar_atsr, header); -		printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags); +		pr_info("ATSR flags: %#x\n", atsr->flags);  		break;  	case ACPI_DMAR_HARDWARE_AFFINITY:  		rhsa = container_of(header, struct acpi_dmar_rhsa, header); -		printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n", +		pr_info("RHSA base: %#016Lx proximity domain: %#x\n",  		       (unsigned long long)rhsa->base_address,  		       rhsa->proximity_domain);  		break; @@ -299,7 +290,7 @@ static int __init dmar_table_detect(void)  				&dmar_tbl_size);  	if (ACPI_SUCCESS(status) && !dmar_tbl) { -		printk (KERN_WARNING PREFIX "Unable to map DMAR\n"); +		pr_warn("Unable to map DMAR\n");  		status = AE_NOT_FOUND;  	} @@ -333,20 +324,18 @@ parse_dmar_table(void)  		return -ENODEV;  	if (dmar->width < PAGE_SHIFT - 1) { -		printk(KERN_WARNING PREFIX "Invalid DMAR haw\n"); +		pr_warn("Invalid DMAR haw\n");  		return -EINVAL;  	} -	printk (KERN_INFO PREFIX "Host address width %d\n", -		dmar->width + 1); +	pr_info("Host address width %d\n", dmar->width + 1);  	entry_header = (struct acpi_dmar_header *)(dmar + 1);  	while (((unsigned long)entry_header) <  			(((unsigned long)dmar) + dmar_tbl->length)) {  		/* Avoid looping forever on bad ACPI tables */  		if (entry_header->length == 0) { -			printk(KERN_WARNING PREFIX -				"Invalid 0-length structure\n"); +			pr_warn("Invalid 0-length structure\n");  			ret = -EINVAL;  			break;  		} @@ -369,8 +358,7 @@ parse_dmar_table(void)  #endif  			break;  		default: -			printk(KERN_WARNING PREFIX -				"Unknown DMAR structure type %d\n", +			pr_warn("Unknown DMAR structure type %d\n",  				entry_header->type);  			ret = 0; /* for forward compatibility */  			break; @@ -469,12 +457,12 @@ int __init dmar_table_init(void)  	ret = parse_dmar_table();  	if (ret) {  		if (ret != -ENODEV) -			printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); +			pr_info("parse DMAR table failure.\n");  		return ret;  	}  	if (list_empty(&dmar_drhd_units)) { -		printk(KERN_INFO PREFIX "No DMAR devices found\n"); +		pr_info("No DMAR devices found\n");  		return -ENODEV;  	} @@ -506,8 +494,7 @@ int __init check_zero_address(void)  			(((unsigned long)dmar) + dmar_tbl->length)) {  		/* Avoid looping forever on bad ACPI tables */  		if (entry_header->length == 0) { -			printk(KERN_WARNING PREFIX -				"Invalid 0-length structure\n"); +			pr_warn("Invalid 0-length structure\n");  			return 0;  		} @@ -558,8 +545,7 @@ int __init detect_intel_iommu(void)  		if (ret && irq_remapping_enabled && cpu_has_x2apic &&  		    dmar->flags & 0x1) -			printk(KERN_INFO -			       "Queued invalidation will be enabled to support x2apic and Intr-remapping.\n"); +			pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");  		if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {  			iommu_detected = 1; @@ -579,14 +565,89 @@ int __init detect_intel_iommu(void)  } +static void unmap_iommu(struct intel_iommu *iommu) +{ +	iounmap(iommu->reg); +	release_mem_region(iommu->reg_phys, iommu->reg_size); +} + +/** + * map_iommu: map the iommu's registers + * @iommu: the iommu to map + * @phys_addr: the physical address of the base resgister + * + * Memory map the iommu's registers.  Start w/ a single page, and + * possibly expand if that turns out to be insufficent. + */ +static int map_iommu(struct intel_iommu *iommu, u64 phys_addr) +{ +	int map_size, err=0; + +	iommu->reg_phys = phys_addr; +	iommu->reg_size = VTD_PAGE_SIZE; + +	if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) { +		pr_err("IOMMU: can't reserve memory\n"); +		err = -EBUSY; +		goto out; +	} + +	iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size); +	if (!iommu->reg) { +		pr_err("IOMMU: can't map the region\n"); +		err = -ENOMEM; +		goto release; +	} + +	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); +	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); + +	if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) { +		err = -EINVAL; +		warn_invalid_dmar(phys_addr, " returns all ones"); +		goto unmap; +	} + +	/* the registers might be more than one page */ +	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), +			 cap_max_fault_reg_offset(iommu->cap)); +	map_size = VTD_PAGE_ALIGN(map_size); +	if (map_size > iommu->reg_size) { +		iounmap(iommu->reg); +		release_mem_region(iommu->reg_phys, iommu->reg_size); +		iommu->reg_size = map_size; +		if (!request_mem_region(iommu->reg_phys, iommu->reg_size, +					iommu->name)) { +			pr_err("IOMMU: can't reserve memory\n"); +			err = -EBUSY; +			goto out; +		} +		iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size); +		if (!iommu->reg) { +			pr_err("IOMMU: can't map the region\n"); +			err = -ENOMEM; +			goto release; +		} +	} +	err = 0; +	goto out; + +unmap: +	iounmap(iommu->reg); +release: +	release_mem_region(iommu->reg_phys, iommu->reg_size); +out: +	return err; +} +  int alloc_iommu(struct dmar_drhd_unit *drhd)  {  	struct intel_iommu *iommu; -	int map_size;  	u32 ver;  	static int iommu_allocated = 0;  	int agaw = 0;  	int msagaw = 0; +	int err;  	if (!drhd->reg_base_addr) {  		warn_invalid_dmar(0, ""); @@ -600,30 +661,22 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)  	iommu->seq_id = iommu_allocated++;  	sprintf (iommu->name, "dmar%d", iommu->seq_id); -	iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE); -	if (!iommu->reg) { -		printk(KERN_ERR "IOMMU: can't map the region\n"); +	err = map_iommu(iommu, drhd->reg_base_addr); +	if (err) { +		pr_err("IOMMU: failed to map %s\n", iommu->name);  		goto error;  	} -	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); -	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); - -	if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) { -		warn_invalid_dmar(drhd->reg_base_addr, " returns all ones"); -		goto err_unmap; -	} +	err = -EINVAL;  	agaw = iommu_calculate_agaw(iommu);  	if (agaw < 0) { -		printk(KERN_ERR -		       "Cannot get a valid agaw for iommu (seq_id = %d)\n", -		       iommu->seq_id); +		pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n", +			iommu->seq_id);  		goto err_unmap;  	}  	msagaw = iommu_calculate_max_sagaw(iommu);  	if (msagaw < 0) { -		printk(KERN_ERR -			"Cannot get a valid max agaw for iommu (seq_id = %d)\n", +		pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",  			iommu->seq_id);  		goto err_unmap;  	} @@ -632,19 +685,6 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)  	iommu->node = -1; -	/* the registers might be more than one page */ -	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), -		cap_max_fault_reg_offset(iommu->cap)); -	map_size = VTD_PAGE_ALIGN(map_size); -	if (map_size > VTD_PAGE_SIZE) { -		iounmap(iommu->reg); -		iommu->reg = ioremap(drhd->reg_base_addr, map_size); -		if (!iommu->reg) { -			printk(KERN_ERR "IOMMU: can't map the region\n"); -			goto error; -		} -	} -  	ver = readl(iommu->reg + DMAR_VER_REG);  	pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",  		iommu->seq_id, @@ -659,10 +699,10 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)  	return 0;   err_unmap: -	iounmap(iommu->reg); +	unmap_iommu(iommu);   error:  	kfree(iommu); -	return -1; +	return err;  }  void free_iommu(struct intel_iommu *iommu) @@ -673,7 +713,8 @@ void free_iommu(struct intel_iommu *iommu)  	free_dmar_iommu(iommu);  	if (iommu->reg) -		iounmap(iommu->reg); +		unmap_iommu(iommu); +  	kfree(iommu);  } @@ -710,7 +751,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)  	if (fault & DMA_FSTS_IQE) {  		head = readl(iommu->reg + DMAR_IQH_REG);  		if ((head >> DMAR_IQ_SHIFT) == index) { -			printk(KERN_ERR "VT-d detected invalid descriptor: " +			pr_err("VT-d detected invalid descriptor: "  				"low=%llx, high=%llx\n",  				(unsigned long long)qi->desc[index].low,  				(unsigned long long)qi->desc[index].high); @@ -1129,15 +1170,14 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,  	reason = dmar_get_fault_reason(fault_reason, &fault_type);  	if (fault_type == INTR_REMAP) -		printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] " +		pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] "  		       "fault index %llx\n"  			"INTR-REMAP:[fault reason %02d] %s\n",  			(source_id >> 8), PCI_SLOT(source_id & 0xFF),  			PCI_FUNC(source_id & 0xFF), addr >> 48,  			fault_reason, reason);  	else -		printk(KERN_ERR -		       "DMAR:[%s] Request device [%02x:%02x.%d] " +		pr_err("DMAR:[%s] Request device [%02x:%02x.%d] "  		       "fault addr %llx \n"  		       "DMAR:[fault reason %02d] %s\n",  		       (type ? "DMA Read" : "DMA Write"), @@ -1157,8 +1197,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id)  	raw_spin_lock_irqsave(&iommu->register_lock, flag);  	fault_status = readl(iommu->reg + DMAR_FSTS_REG);  	if (fault_status) -		printk(KERN_ERR "DRHD: handling fault status reg %x\n", -		       fault_status); +		pr_err("DRHD: handling fault status reg %x\n", fault_status);  	/* TBD: ignore advanced fault log currently */  	if (!(fault_status & DMA_FSTS_PPF)) @@ -1224,7 +1263,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu)  	irq = create_irq();  	if (!irq) { -		printk(KERN_ERR "IOMMU: no free vectors\n"); +		pr_err("IOMMU: no free vectors\n");  		return -EINVAL;  	} @@ -1241,7 +1280,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu)  	ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);  	if (ret) -		printk(KERN_ERR "IOMMU: can't request irq\n"); +		pr_err("IOMMU: can't request irq\n");  	return ret;  } @@ -1258,8 +1297,7 @@ int __init enable_drhd_fault_handling(void)  		ret = dmar_set_interrupt(iommu);  		if (ret) { -			printk(KERN_ERR "DRHD %Lx: failed to enable fault, " -			       " interrupt, ret %d\n", +			pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",  			       (unsigned long long)drhd->reg_base_addr, ret);  			return -1;  		} diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 6d347064b8b..e0b18f3ae9a 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -902,7 +902,6 @@ static int intel_setup_ioapic_entry(int irq,  	return 0;  } -#ifdef CONFIG_SMP  /*   * Migrate the IO-APIC irq in the presence of intr-remapping.   * @@ -924,6 +923,10 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,  	struct irq_cfg *cfg = data->chip_data;  	unsigned int dest, irq = data->irq;  	struct irte irte; +	int err; + +	if (!config_enabled(CONFIG_SMP)) +		return -EINVAL;  	if (!cpumask_intersects(mask, cpu_online_mask))  		return -EINVAL; @@ -931,10 +934,16 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,  	if (get_irte(irq, &irte))  		return -EBUSY; -	if (assign_irq_vector(irq, cfg, mask)) -		return -EBUSY; +	err = assign_irq_vector(irq, cfg, mask); +	if (err) +		return err; -	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); +	err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest); +	if (err) { +		if (assign_irq_vector(irq, cfg, data->affinity)) +			pr_err("Failed to recover vector for irq %d\n", irq); +		return err; +	}  	irte.vector = cfg->vector;  	irte.dest_id = IRTE_DEST(dest); @@ -956,7 +965,6 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,  	cpumask_copy(data->affinity, mask);  	return 0;  } -#endif  static void intel_compose_msi_msg(struct pci_dev *pdev,  				  unsigned int irq, unsigned int dest, @@ -1058,9 +1066,7 @@ struct irq_remap_ops intel_irq_remap_ops = {  	.reenable		= reenable_irq_remapping,  	.enable_faulting	= enable_drhd_fault_handling,  	.setup_ioapic_entry	= intel_setup_ioapic_entry, -#ifdef CONFIG_SMP  	.set_affinity		= intel_ioapic_set_affinity, -#endif  	.free_irq		= free_irte,  	.compose_msi_msg	= intel_compose_msi_msg,  	.msi_alloc_irq		= intel_msi_alloc_irq, diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 40cda8e98d8..1d29b1c66e7 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -111,16 +111,15 @@ int setup_ioapic_remapped_entry(int irq,  					     vector, attr);  } -#ifdef CONFIG_SMP  int set_remapped_irq_affinity(struct irq_data *data, const struct cpumask *mask,  			      bool force)  { -	if (!remap_ops || !remap_ops->set_affinity) +	if (!config_enabled(CONFIG_SMP) || !remap_ops || +	    !remap_ops->set_affinity)  		return 0;  	return remap_ops->set_affinity(data, mask, force);  } -#endif  void free_remapped_irq(int irq)  { diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index be9d72950c5..b12974cc1df 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -59,11 +59,9 @@ struct irq_remap_ops {  				  unsigned int, int,  				  struct io_apic_irq_attr *); -#ifdef CONFIG_SMP  	/* Set the CPU affinity of a remapped interrupt */  	int (*set_affinity)(struct irq_data *data, const struct cpumask *mask,  			    bool force); -#endif  	/* Free an IRQ */  	int (*free_irq)(int); diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c index 1a0ae4445ff..5f21f629b7a 100644 --- a/drivers/isdn/mISDN/stack.c +++ b/drivers/isdn/mISDN/stack.c @@ -135,8 +135,8 @@ send_layer2(struct mISDNstack *st, struct sk_buff *skb)  			skb = NULL;  		else if (*debug & DEBUG_SEND_ERR)  			printk(KERN_DEBUG -			       "%s ch%d mgr prim(%x) addr(%x) err %d\n", -			       __func__, ch->nr, hh->prim, ch->addr, ret); +			       "%s mgr prim(%x) err %d\n", +			       __func__, hh->prim, ret);  	}  out:  	mutex_unlock(&st->lmutex); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index d039de8322f..b58b7a33914 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1084,6 +1084,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)  	ti->split_io = dm_rh_get_region_size(ms->rh);  	ti->num_flush_requests = 1;  	ti->num_discard_requests = 1; +	ti->discard_zeroes_data_unsupported = 1;  	ms->kmirrord_wq = alloc_workqueue("kmirrord",  					  WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0); @@ -1214,7 +1215,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,  	 * We need to dec pending if this was a write.  	 */  	if (rw == WRITE) { -		if (!(bio->bi_rw & REQ_FLUSH)) +		if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD)))  			dm_rh_dec(ms->rh, map_context->ll);  		return error;  	} diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index 7771ed21218..69732e03eb3 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c @@ -404,6 +404,9 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)  		return;  	} +	if (bio->bi_rw & REQ_DISCARD) +		return; +  	/* We must inform the log that the sync count has changed. */  	log->type->set_region_sync(log, region, 0); @@ -524,7 +527,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)  	struct bio *bio;  	for (bio = bios->head; bio; bio = bio->bi_next) { -		if (bio->bi_rw & REQ_FLUSH) +		if (bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))  			continue;  		rh_inc(rh, dm_rh_bio_to_region(rh, bio));  	} diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index ce59824fb41..68694da0d21 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1245,7 +1245,10 @@ static void process_discard(struct thin_c *tc, struct bio *bio)  			cell_release_singleton(cell, bio);  			cell_release_singleton(cell2, bio); -			remap_and_issue(tc, bio, lookup_result.block); +			if ((!lookup_result.shared) && pool->pf.discard_passdown) +				remap_and_issue(tc, bio, lookup_result.block); +			else +				bio_endio(bio, 0);  		}  		break; @@ -2628,6 +2631,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)  	if (tc->pool->pf.discard_enabled) {  		ti->discards_supported = 1;  		ti->num_discard_requests = 1; +		ti->discard_zeroes_data_unsupported = 1;  	}  	dm_put(pool_md); diff --git a/drivers/md/md.c b/drivers/md/md.c index a4c219e3c85..d5ab4493c8b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2931,6 +2931,7 @@ offset_store(struct md_rdev *rdev, const char *buf, size_t len)  		 * can be sane */  		return -EBUSY;  	rdev->data_offset = offset; +	rdev->new_data_offset = offset;  	return len;  } @@ -3926,8 +3927,8 @@ array_state_show(struct mddev *mddev, char *page)  	return sprintf(page, "%s\n", array_states[st]);  } -static int do_md_stop(struct mddev * mddev, int ro, int is_open); -static int md_set_readonly(struct mddev * mddev, int is_open); +static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev); +static int md_set_readonly(struct mddev * mddev, struct block_device *bdev);  static int do_md_run(struct mddev * mddev);  static int restart_array(struct mddev *mddev); @@ -3943,14 +3944,14 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)  		/* stopping an active array */  		if (atomic_read(&mddev->openers) > 0)  			return -EBUSY; -		err = do_md_stop(mddev, 0, 0); +		err = do_md_stop(mddev, 0, NULL);  		break;  	case inactive:  		/* stopping an active array */  		if (mddev->pers) {  			if (atomic_read(&mddev->openers) > 0)  				return -EBUSY; -			err = do_md_stop(mddev, 2, 0); +			err = do_md_stop(mddev, 2, NULL);  		} else  			err = 0; /* already inactive */  		break; @@ -3958,7 +3959,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)  		break; /* not supported yet */  	case readonly:  		if (mddev->pers) -			err = md_set_readonly(mddev, 0); +			err = md_set_readonly(mddev, NULL);  		else {  			mddev->ro = 1;  			set_disk_ro(mddev->gendisk, 1); @@ -3968,7 +3969,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)  	case read_auto:  		if (mddev->pers) {  			if (mddev->ro == 0) -				err = md_set_readonly(mddev, 0); +				err = md_set_readonly(mddev, NULL);  			else if (mddev->ro == 1)  				err = restart_array(mddev);  			if (err == 0) { @@ -5351,15 +5352,17 @@ void md_stop(struct mddev *mddev)  }  EXPORT_SYMBOL_GPL(md_stop); -static int md_set_readonly(struct mddev *mddev, int is_open) +static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)  {  	int err = 0;  	mutex_lock(&mddev->open_mutex); -	if (atomic_read(&mddev->openers) > is_open) { +	if (atomic_read(&mddev->openers) > !!bdev) {  		printk("md: %s still in use.\n",mdname(mddev));  		err = -EBUSY;  		goto out;  	} +	if (bdev) +		sync_blockdev(bdev);  	if (mddev->pers) {  		__md_stop_writes(mddev); @@ -5381,18 +5384,26 @@ out:   *   0 - completely stop and dis-assemble array   *   2 - stop but do not disassemble array   */ -static int do_md_stop(struct mddev * mddev, int mode, int is_open) +static int do_md_stop(struct mddev * mddev, int mode, +		      struct block_device *bdev)  {  	struct gendisk *disk = mddev->gendisk;  	struct md_rdev *rdev;  	mutex_lock(&mddev->open_mutex); -	if (atomic_read(&mddev->openers) > is_open || +	if (atomic_read(&mddev->openers) > !!bdev ||  	    mddev->sysfs_active) {  		printk("md: %s still in use.\n",mdname(mddev));  		mutex_unlock(&mddev->open_mutex);  		return -EBUSY;  	} +	if (bdev) +		/* It is possible IO was issued on some other +		 * open file which was closed before we took ->open_mutex. +		 * As that was not the last close __blkdev_put will not +		 * have called sync_blockdev, so we must. +		 */ +		sync_blockdev(bdev);  	if (mddev->pers) {  		if (mddev->ro) @@ -5466,7 +5477,7 @@ static void autorun_array(struct mddev *mddev)  	err = do_md_run(mddev);  	if (err) {  		printk(KERN_WARNING "md: do_md_run() returned %d\n", err); -		do_md_stop(mddev, 0, 0); +		do_md_stop(mddev, 0, NULL);  	}  } @@ -6481,11 +6492,11 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,  			goto done_unlock;  		case STOP_ARRAY: -			err = do_md_stop(mddev, 0, 1); +			err = do_md_stop(mddev, 0, bdev);  			goto done_unlock;  		case STOP_ARRAY_RO: -			err = md_set_readonly(mddev, 1); +			err = md_set_readonly(mddev, bdev);  			goto done_unlock;  		case BLKROSET: diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 240ff312504..cacd008d686 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1818,8 +1818,14 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)  	if (atomic_dec_and_test(&r1_bio->remaining)) {  		/* if we're here, all write(s) have completed, so clean up */ -		md_done_sync(mddev, r1_bio->sectors, 1); -		put_buf(r1_bio); +		int s = r1_bio->sectors; +		if (test_bit(R1BIO_MadeGood, &r1_bio->state) || +		    test_bit(R1BIO_WriteError, &r1_bio->state)) +			reschedule_retry(r1_bio); +		else { +			put_buf(r1_bio); +			md_done_sync(mddev, s, 1); +		}  	}  } diff --git a/drivers/media/video/cx25821/cx25821-core.c b/drivers/media/video/cx25821/cx25821-core.c index 83c1aa6b2e6..f11f6f07e91 100644 --- a/drivers/media/video/cx25821/cx25821-core.c +++ b/drivers/media/video/cx25821/cx25821-core.c @@ -904,9 +904,6 @@ static int cx25821_dev_setup(struct cx25821_dev *dev)  	list_add_tail(&dev->devlist, &cx25821_devlist);  	mutex_unlock(&cx25821_devlist_mutex); -	strcpy(cx25821_boards[UNKNOWN_BOARD].name, "unknown"); -	strcpy(cx25821_boards[CX25821_BOARD].name, "cx25821"); -  	if (dev->pci->device != 0x8210) {  		pr_info("%s(): Exiting. Incorrect Hardware device = 0x%02x\n",  			__func__, dev->pci->device); diff --git a/drivers/media/video/cx25821/cx25821.h b/drivers/media/video/cx25821/cx25821.h index b9aa801b00a..029f2934a6d 100644 --- a/drivers/media/video/cx25821/cx25821.h +++ b/drivers/media/video/cx25821/cx25821.h @@ -187,7 +187,7 @@ enum port {  };  struct cx25821_board { -	char *name; +	const char *name;  	enum port porta;  	enum port portb;  	enum port portc; diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c index 83dbb2ddff1..0cbada18f6f 100644 --- a/drivers/media/video/v4l2-dev.c +++ b/drivers/media/video/v4l2-dev.c @@ -681,6 +681,7 @@ static void determine_valid_ioctls(struct video_device *vdev)  	SET_VALID_IOCTL(ops, VIDIOC_G_DV_TIMINGS, vidioc_g_dv_timings);  	SET_VALID_IOCTL(ops, VIDIOC_ENUM_DV_TIMINGS, vidioc_enum_dv_timings);  	SET_VALID_IOCTL(ops, VIDIOC_QUERY_DV_TIMINGS, vidioc_query_dv_timings); +	SET_VALID_IOCTL(ops, VIDIOC_DV_TIMINGS_CAP, vidioc_dv_timings_cap);  	/* yes, really vidioc_subscribe_event */  	SET_VALID_IOCTL(ops, VIDIOC_DQEVENT, vidioc_subscribe_event);  	SET_VALID_IOCTL(ops, VIDIOC_SUBSCRIBE_EVENT, vidioc_subscribe_event); diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c index 3680aa251de..2cf084eb9d5 100644 --- a/drivers/net/bonding/bond_debugfs.c +++ b/drivers/net/bonding/bond_debugfs.c @@ -6,7 +6,7 @@  #include "bonding.h"  #include "bond_alb.h" -#ifdef CONFIG_DEBUG_FS +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_NET_NS)  #include <linux/debugfs.h>  #include <linux/seq_file.h> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b9c2ae62166..2ee76993f05 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3227,6 +3227,12 @@ static int bond_master_netdev_event(unsigned long event,  	switch (event) {  	case NETDEV_CHANGENAME:  		return bond_event_changename(event_bond); +	case NETDEV_UNREGISTER: +		bond_remove_proc_entry(event_bond); +		break; +	case NETDEV_REGISTER: +		bond_create_proc_entry(event_bond); +		break;  	default:  		break;  	} @@ -4411,8 +4417,6 @@ static void bond_uninit(struct net_device *bond_dev)  	bond_work_cancel_all(bond); -	bond_remove_proc_entry(bond); -  	bond_debug_unregister(bond);  	__hw_addr_flush(&bond->mc_list); @@ -4814,7 +4818,6 @@ static int bond_init(struct net_device *bond_dev)  	bond_set_lockdep_class(bond_dev); -	bond_create_proc_entry(bond);  	list_add_tail(&bond->bond_list, &bn->dev_list);  	bond_prepare_sysfs_group(bond); diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 9cc15701101..1f78b63d5ef 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -261,7 +261,6 @@ static void atl1c_check_link_status(struct atl1c_adapter *adapter)  	if ((phy_data & BMSR_LSTATUS) == 0) {  		/* link down */  		netif_carrier_off(netdev); -		netif_stop_queue(netdev);  		hw->hibernate = true;  		if (atl1c_reset_mac(hw) != 0)  			if (netif_msg_hw(adapter)) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 46b8b7d8163..d09c6b583d1 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -656,7 +656,7 @@ static int b44_alloc_rx_skb(struct b44 *bp, int src_idx, u32 dest_idx_unmasked)  			dma_unmap_single(bp->sdev->dma_dev, mapping,  					     RX_PKT_BUF_SZ, DMA_FROM_DEVICE);  		dev_kfree_skb_any(skb); -		skb = __netdev_alloc_skb(bp->dev, RX_PKT_BUF_SZ, GFP_ATOMIC|GFP_DMA); +		skb = alloc_skb(RX_PKT_BUF_SZ, GFP_ATOMIC | GFP_DMA);  		if (skb == NULL)  			return -ENOMEM;  		mapping = dma_map_single(bp->sdev->dma_dev, skb->data, @@ -967,7 +967,7 @@ static netdev_tx_t b44_start_xmit(struct sk_buff *skb, struct net_device *dev)  			dma_unmap_single(bp->sdev->dma_dev, mapping, len,  					     DMA_TO_DEVICE); -		bounce_skb = __netdev_alloc_skb(dev, len, GFP_ATOMIC | GFP_DMA); +		bounce_skb = alloc_skb(len, GFP_ATOMIC | GFP_DMA);  		if (!bounce_skb)  			goto err_out; diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index ac7b7448853..1fa4927a45b 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -5372,7 +5372,7 @@ bnx2_free_tx_skbs(struct bnx2 *bp)  			int k, last;  			if (skb == NULL) { -				j++; +				j = NEXT_TX_BD(j);  				continue;  			} @@ -5384,8 +5384,8 @@ bnx2_free_tx_skbs(struct bnx2 *bp)  			tx_buf->skb = NULL;  			last = tx_buf->nr_frags; -			j++; -			for (k = 0; k < last; k++, j++) { +			j = NEXT_TX_BD(j); +			for (k = 0; k < last; k++, j = NEXT_TX_BD(j)) {  				tx_buf = &txr->tx_buf_ring[TX_RING_IDX(j)];  				dma_unmap_page(&bp->pdev->dev,  					dma_unmap_addr(tx_buf, mapping), diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index c95e7b5e2b8..2c89d17cbb2 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -534,7 +534,8 @@ int cnic_unregister_driver(int ulp_type)  	}  	if (atomic_read(&ulp_ops->ref_count) != 0) -		netdev_warn(dev->netdev, "Failed waiting for ref count to go to zero\n"); +		pr_warn("%s: Failed waiting for ref count to go to zero\n", +			__func__);  	return 0;  out_unlock: @@ -1053,12 +1054,13 @@ static int cnic_init_uio(struct cnic_dev *dev)  	uinfo = &udev->cnic_uinfo; -	uinfo->mem[0].addr = dev->netdev->base_addr; +	uinfo->mem[0].addr = pci_resource_start(dev->pcidev, 0);  	uinfo->mem[0].internal_addr = dev->regview; -	uinfo->mem[0].size = dev->netdev->mem_end - dev->netdev->mem_start;  	uinfo->mem[0].memtype = UIO_MEM_PHYS;  	if (test_bit(CNIC_F_BNX2_CLASS, &dev->flags)) { +		uinfo->mem[0].size = MB_GET_CID_ADDR(TX_TSS_CID + +						     TX_MAX_TSS_RINGS + 1);  		uinfo->mem[1].addr = (unsigned long) cp->status_blk.gen &  					PAGE_MASK;  		if (cp->ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX) @@ -1068,6 +1070,8 @@ static int cnic_init_uio(struct cnic_dev *dev)  		uinfo->name = "bnx2_cnic";  	} else if (test_bit(CNIC_F_BNX2X_CLASS, &dev->flags)) { +		uinfo->mem[0].size = pci_resource_len(dev->pcidev, 0); +  		uinfo->mem[1].addr = (unsigned long) cp->bnx2x_def_status_blk &  			PAGE_MASK;  		uinfo->mem[1].size = sizeof(*cp->bnx2x_def_status_blk); diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index f2db8fca46a..ab1d80ff079 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2063,10 +2063,9 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)  			return NETDEV_TX_OK;  		} -		/* Steal sock reference for processing TX time stamps */ -		swap(skb_new->sk, skb->sk); -		swap(skb_new->destructor, skb->destructor); -		kfree_skb(skb); +		if (skb->sk) +			skb_set_owner_w(skb_new, skb->sk); +		consume_skb(skb);  		skb = skb_new;  	} diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 36db4df09ae..1f063dcd8f8 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -1572,6 +1572,9 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw)  	ctrl = er32(CTRL);  	status = er32(STATUS);  	rxcw = er32(RXCW); +	/* SYNCH bit and IV bit are sticky */ +	udelay(10); +	rxcw = er32(RXCW);  	if ((rxcw & E1000_RXCW_SYNCH) && !(rxcw & E1000_RXCW_IV)) { diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 238ab2f8a5e..e3a7b07df62 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -325,24 +325,46 @@ static inline void __ew32flash(struct e1000_hw *hw, unsigned long reg, u32 val)   **/  static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw)  { -	u16 phy_reg; -	u32 phy_id; +	u16 phy_reg = 0; +	u32 phy_id = 0; +	s32 ret_val; +	u16 retry_count; + +	for (retry_count = 0; retry_count < 2; retry_count++) { +		ret_val = e1e_rphy_locked(hw, PHY_ID1, &phy_reg); +		if (ret_val || (phy_reg == 0xFFFF)) +			continue; +		phy_id = (u32)(phy_reg << 16); -	e1e_rphy_locked(hw, PHY_ID1, &phy_reg); -	phy_id = (u32)(phy_reg << 16); -	e1e_rphy_locked(hw, PHY_ID2, &phy_reg); -	phy_id |= (u32)(phy_reg & PHY_REVISION_MASK); +		ret_val = e1e_rphy_locked(hw, PHY_ID2, &phy_reg); +		if (ret_val || (phy_reg == 0xFFFF)) { +			phy_id = 0; +			continue; +		} +		phy_id |= (u32)(phy_reg & PHY_REVISION_MASK); +		break; +	}  	if (hw->phy.id) {  		if (hw->phy.id == phy_id)  			return true; -	} else { -		if ((phy_id != 0) && (phy_id != PHY_REVISION_MASK)) -			hw->phy.id = phy_id; +	} else if (phy_id) { +		hw->phy.id = phy_id; +		hw->phy.revision = (u32)(phy_reg & ~PHY_REVISION_MASK);  		return true;  	} -	return false; +	/* +	 * In case the PHY needs to be in mdio slow mode, +	 * set slow mode and try to get the PHY id again. +	 */ +	hw->phy.ops.release(hw); +	ret_val = e1000_set_mdio_slow_mode_hv(hw); +	if (!ret_val) +		ret_val = e1000e_get_phy_id(hw); +	hw->phy.ops.acquire(hw); + +	return !ret_val;  }  /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 18ca3bcadf0..e242104ab47 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6647,6 +6647,11 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc)  		return -EINVAL;  	} +	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { +		e_err(drv, "Enable failed, SR-IOV enabled\n"); +		return -EINVAL; +	} +  	/* Hardware supports up to 8 traffic classes */  	if (tc > adapter->dcb_cfg.num_tcs.pg_tcs ||  	    (hw->mac.type == ixgbe_mac_82598EB && diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index f69ec4288b1..41e32257a4e 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -201,6 +201,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_adapter *adapter,  	unsigned int i, eop, count = 0;  	unsigned int total_bytes = 0, total_packets = 0; +	if (test_bit(__IXGBEVF_DOWN, &adapter->state)) +		return true; +  	i = tx_ring->next_to_clean;  	eop = tx_ring->tx_buffer_info[i].next_to_watch;  	eop_desc = IXGBE_TX_DESC_ADV(*tx_ring, eop); @@ -969,8 +972,6 @@ static irqreturn_t ixgbevf_msix_clean_tx(int irq, void *data)  	r_idx = find_first_bit(q_vector->txr_idx, adapter->num_tx_queues);  	for (i = 0; i < q_vector->txr_count; i++) {  		tx_ring = &(adapter->tx_ring[r_idx]); -		tx_ring->total_bytes = 0; -		tx_ring->total_packets = 0;  		ixgbevf_clean_tx_irq(adapter, tx_ring);  		r_idx = find_next_bit(q_vector->txr_idx, adapter->num_tx_queues,  				      r_idx + 1); @@ -994,16 +995,6 @@ static irqreturn_t ixgbevf_msix_clean_rx(int irq, void *data)  	struct ixgbe_hw *hw = &adapter->hw;  	struct ixgbevf_ring  *rx_ring;  	int r_idx; -	int i; - -	r_idx = find_first_bit(q_vector->rxr_idx, adapter->num_rx_queues); -	for (i = 0; i < q_vector->rxr_count; i++) { -		rx_ring = &(adapter->rx_ring[r_idx]); -		rx_ring->total_bytes = 0; -		rx_ring->total_packets = 0; -		r_idx = find_next_bit(q_vector->rxr_idx, adapter->num_rx_queues, -				      r_idx + 1); -	}  	if (!q_vector->rxr_count)  		return IRQ_HANDLED; diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index fb8377da168..4b785e10f2e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -51,7 +51,7 @@ static unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)  		desc->des3 = desc->des2 + BUF_SIZE_4KiB;  		priv->hw->desc->prepare_tx_desc(desc, 1, bmax,  						csum); - +		wmb();  		entry = (++priv->cur_tx) % txsize;  		desc = priv->dma_tx + entry; @@ -59,6 +59,7 @@ static unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)  					    len, DMA_TO_DEVICE);  		desc->des3 = desc->des2 + BUF_SIZE_4KiB;  		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum); +		wmb();  		priv->hw->desc->set_tx_owner(desc);  		priv->tx_skbuff[entry] = NULL;  	} else { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 51b3b68528e..ea3003edde1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1212,6 +1212,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)  		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum_insertion);  		wmb();  		priv->hw->desc->set_tx_owner(desc); +		wmb();  	}  	/* Interrupt on completition only for the latest segment */ @@ -1227,6 +1228,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)  	/* To avoid raise condition */  	priv->hw->desc->set_tx_owner(first); +	wmb();  	priv->cur_tx++; @@ -1290,6 +1292,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv)  		}  		wmb();  		priv->hw->desc->set_rx_owner(p + entry); +		wmb();  	}  } diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c index 39ea0674dcd..5c120189ec8 100644 --- a/drivers/net/phy/mdio-mux.c +++ b/drivers/net/phy/mdio-mux.c @@ -46,7 +46,13 @@ static int mdio_mux_read(struct mii_bus *bus, int phy_id, int regnum)  	struct mdio_mux_parent_bus *pb = cb->parent;  	int r; -	mutex_lock(&pb->mii_bus->mdio_lock); +	/* In theory multiple mdio_mux could be stacked, thus creating +	 * more than a single level of nesting.  But in practice, +	 * SINGLE_DEPTH_NESTING will cover the vast majority of use +	 * cases.  We use it, instead of trying to handle the general +	 * case. +	 */ +	mutex_lock_nested(&pb->mii_bus->mdio_lock, SINGLE_DEPTH_NESTING);  	r = pb->switch_fn(pb->current_child, cb->bus_number, pb->switch_data);  	if (r)  		goto out; @@ -71,7 +77,7 @@ static int mdio_mux_write(struct mii_bus *bus, int phy_id,  	int r; -	mutex_lock(&pb->mii_bus->mdio_lock); +	mutex_lock_nested(&pb->mii_bus->mdio_lock, SINGLE_DEPTH_NESTING);  	r = pb->switch_fn(pb->current_child, cb->bus_number, pb->switch_data);  	if (r)  		goto out; diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index b01960fcfbc..a051cedd64b 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -346,6 +346,15 @@ static const struct driver_info	qmi_wwan_force_int1 = {  	.data		= BIT(1), /* interface whitelist bitmap */  }; +static const struct driver_info qmi_wwan_force_int2 = { +	.description	= "Qualcomm WWAN/QMI device", +	.flags		= FLAG_WWAN, +	.bind		= qmi_wwan_bind_shared, +	.unbind		= qmi_wwan_unbind_shared, +	.manage_power	= qmi_wwan_manage_power, +	.data		= BIT(2), /* interface whitelist bitmap */ +}; +  static const struct driver_info	qmi_wwan_force_int3 = {  	.description	= "Qualcomm WWAN/QMI device",  	.flags		= FLAG_WWAN, @@ -498,6 +507,15 @@ static const struct usb_device_id products[] = {  		.bInterfaceProtocol = 0xff,  		.driver_info        = (unsigned long)&qmi_wwan_force_int4,  	}, +	{	/* ZTE MF60 */ +		.match_flags	    = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, +		.idVendor           = 0x19d2, +		.idProduct          = 0x1402, +		.bInterfaceClass    = 0xff, +		.bInterfaceSubClass = 0xff, +		.bInterfaceProtocol = 0xff, +		.driver_info        = (unsigned long)&qmi_wwan_force_int2, +	},  	{	/* Sierra Wireless MC77xx in QMI mode */  		.match_flags	    = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO,  		.idVendor           = 0x1199, diff --git a/drivers/net/wireless/b43legacy/dma.c b/drivers/net/wireless/b43legacy/dma.c index f1f8bd09bd8..c8baf020c20 100644 --- a/drivers/net/wireless/b43legacy/dma.c +++ b/drivers/net/wireless/b43legacy/dma.c @@ -1072,7 +1072,7 @@ static int dma_tx_fragment(struct b43legacy_dmaring *ring,  	meta->dmaaddr = map_descbuffer(ring, skb->data, skb->len, 1);  	/* create a bounce buffer in zone_dma on mapping failure. */  	if (b43legacy_dma_mapping_error(ring, meta->dmaaddr, skb->len, 1)) { -		bounce_skb = __dev_alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA); +		bounce_skb = alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA);  		if (!bounce_skb) {  			ring->current_slot = old_top_slot;  			ring->used_slots = old_used_slots; diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c index 509301a5e7e..ff5d689e13f 100644 --- a/drivers/net/wireless/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/iwlegacy/4965-mac.c @@ -3405,7 +3405,7 @@ il4965_remove_dynamic_key(struct il_priv *il,  		return 0;  	} -	if (il->stations[sta_id].sta.key.key_offset == WEP_INVALID_OFFSET) { +	if (il->stations[sta_id].sta.key.key_flags & STA_KEY_FLG_INVALID) {  		IL_WARN("Removing wrong key %d 0x%x\n", keyconf->keyidx,  			key_flags);  		spin_unlock_irqrestore(&il->sta_lock, flags); @@ -3420,7 +3420,7 @@ il4965_remove_dynamic_key(struct il_priv *il,  	memset(&il->stations[sta_id].sta.key, 0, sizeof(struct il4965_keyinfo));  	il->stations[sta_id].sta.key.key_flags =  	    STA_KEY_FLG_NO_ENC | STA_KEY_FLG_INVALID; -	il->stations[sta_id].sta.key.key_offset = WEP_INVALID_OFFSET; +	il->stations[sta_id].sta.key.key_offset = keyconf->hw_key_idx;  	il->stations[sta_id].sta.sta.modify_mask = STA_MODIFY_KEY_MASK;  	il->stations[sta_id].sta.mode = STA_CONTROL_MODIFY_MSK; diff --git a/drivers/net/wireless/iwlegacy/common.c b/drivers/net/wireless/iwlegacy/common.c index cbf2dc18341..5d4807c2b56 100644 --- a/drivers/net/wireless/iwlegacy/common.c +++ b/drivers/net/wireless/iwlegacy/common.c @@ -4767,14 +4767,12 @@ il_bg_watchdog(unsigned long data)  		return;  	/* monitor and check for other stuck queues */ -	if (il_is_any_associated(il)) { -		for (cnt = 0; cnt < il->hw_params.max_txq_num; cnt++) { -			/* skip as we already checked the command queue */ -			if (cnt == il->cmd_queue) -				continue; -			if (il_check_stuck_queue(il, cnt)) -				return; -		} +	for (cnt = 0; cnt < il->hw_params.max_txq_num; cnt++) { +		/* skip as we already checked the command queue */ +		if (cnt == il->cmd_queue) +			continue; +		if (il_check_stuck_queue(il, cnt)) +			return;  	}  	mod_timer(&il->watchdog, diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index ce61b6fae1c..5c7fd185373 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -958,6 +958,7 @@ static int mwifiex_cfg80211_start_ap(struct wiphy *wiphy,  	case NL80211_HIDDEN_SSID_ZERO_CONTENTS:  		/* firmware doesn't support this type of hidden SSID */  	default: +		kfree(bss_cfg);  		return -EINVAL;  	} diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c index d357d1ed92f..74ecc33fdd9 100644 --- a/drivers/net/wireless/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/rt2x00/rt2x00usb.c @@ -436,8 +436,8 @@ void rt2x00usb_kick_queue(struct data_queue *queue)  	case QID_RX:  		if (!rt2x00queue_full(queue))  			rt2x00queue_for_each_entry(queue, -						   Q_INDEX_DONE,  						   Q_INDEX, +						   Q_INDEX_DONE,  						   NULL,  						   rt2x00usb_kick_rx_entry);  		break; diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c index efc4b7f308c..f3cfa0b9adf 100644 --- a/drivers/oprofile/oprofile_perf.c +++ b/drivers/oprofile/oprofile_perf.c @@ -1,5 +1,6 @@  /*   * Copyright 2010 ARM Ltd. + * Copyright 2012 Advanced Micro Devices, Inc., Robert Richter   *   * Perf-events backend for OProfile.   */ @@ -25,7 +26,7 @@ static int oprofile_perf_enabled;  static DEFINE_MUTEX(oprofile_perf_mutex);  static struct op_counter_config *counter_config; -static struct perf_event **perf_events[NR_CPUS]; +static DEFINE_PER_CPU(struct perf_event **, perf_events);  static int num_counters;  /* @@ -38,7 +39,7 @@ static void op_overflow_handler(struct perf_event *event,  	u32 cpu = smp_processor_id();  	for (id = 0; id < num_counters; ++id) -		if (perf_events[cpu][id] == event) +		if (per_cpu(perf_events, cpu)[id] == event)  			break;  	if (id != num_counters) @@ -74,7 +75,7 @@ static int op_create_counter(int cpu, int event)  {  	struct perf_event *pevent; -	if (!counter_config[event].enabled || perf_events[cpu][event]) +	if (!counter_config[event].enabled || per_cpu(perf_events, cpu)[event])  		return 0;  	pevent = perf_event_create_kernel_counter(&counter_config[event].attr, @@ -91,18 +92,18 @@ static int op_create_counter(int cpu, int event)  		return -EBUSY;  	} -	perf_events[cpu][event] = pevent; +	per_cpu(perf_events, cpu)[event] = pevent;  	return 0;  }  static void op_destroy_counter(int cpu, int event)  { -	struct perf_event *pevent = perf_events[cpu][event]; +	struct perf_event *pevent = per_cpu(perf_events, cpu)[event];  	if (pevent) {  		perf_event_release_kernel(pevent); -		perf_events[cpu][event] = NULL; +		per_cpu(perf_events, cpu)[event] = NULL;  	}  } @@ -257,12 +258,12 @@ void oprofile_perf_exit(void)  	for_each_possible_cpu(cpu) {  		for (id = 0; id < num_counters; ++id) { -			event = perf_events[cpu][id]; +			event = per_cpu(perf_events, cpu)[id];  			if (event)  				perf_event_release_kernel(event);  		} -		kfree(perf_events[cpu]); +		kfree(per_cpu(perf_events, cpu));  	}  	kfree(counter_config); @@ -277,8 +278,6 @@ int __init oprofile_perf_init(struct oprofile_operations *ops)  	if (ret)  		return ret; -	memset(&perf_events, 0, sizeof(perf_events)); -  	num_counters = perf_num_counters();  	if (num_counters <= 0) {  		pr_info("oprofile: no performance counters\n"); @@ -298,9 +297,9 @@ int __init oprofile_perf_init(struct oprofile_operations *ops)  	}  	for_each_possible_cpu(cpu) { -		perf_events[cpu] = kcalloc(num_counters, +		per_cpu(perf_events, cpu) = kcalloc(num_counters,  				sizeof(struct perf_event *), GFP_KERNEL); -		if (!perf_events[cpu]) { +		if (!per_cpu(perf_events, cpu)) {  			pr_info("oprofile: failed to allocate %d perf events "  					"for cpu %d\n", num_counters, cpu);  			ret = -ENOMEM; diff --git a/drivers/pinctrl/pinctrl-imx.c b/drivers/pinctrl/pinctrl-imx.c index dd6d93aa533..90c837f469a 100644 --- a/drivers/pinctrl/pinctrl-imx.c +++ b/drivers/pinctrl/pinctrl-imx.c @@ -474,7 +474,9 @@ static int __devinit imx_pinctrl_parse_groups(struct device_node *np,  		grp->configs[j] = config & ~IMX_PAD_SION;  	} +#ifdef DEBUG  	IMX_PMX_DUMP(info, grp->pins, grp->mux_mode, grp->configs, grp->npins); +#endif  	return 0;  } diff --git a/drivers/pinctrl/pinctrl-imx6q.c b/drivers/pinctrl/pinctrl-imx6q.c index 7737d4d71a3..e9bf71fbedc 100644 --- a/drivers/pinctrl/pinctrl-imx6q.c +++ b/drivers/pinctrl/pinctrl-imx6q.c @@ -1950,6 +1950,8 @@ static struct imx_pin_reg imx6q_pin_regs[] = {  	IMX_PIN_REG(MX6Q_PAD_SD2_DAT3, 0x0744, 0x035C, 5, 0x0000, 0), /* MX6Q_PAD_SD2_DAT3__GPIO_1_12 */  	IMX_PIN_REG(MX6Q_PAD_SD2_DAT3, 0x0744, 0x035C, 6, 0x0000, 0), /* MX6Q_PAD_SD2_DAT3__SJC_DONE */  	IMX_PIN_REG(MX6Q_PAD_SD2_DAT3, 0x0744, 0x035C, 7, 0x0000, 0), /* MX6Q_PAD_SD2_DAT3__ANATOP_TESTO_3 */ +	IMX_PIN_REG(MX6Q_PAD_ENET_RX_ER, 0x04EC, 0x01D8, 0, 0x0000, 0), /* MX6Q_PAD_ENET_RX_ER__ANATOP_USBOTG_ID */ +	IMX_PIN_REG(MX6Q_PAD_GPIO_1, 0x05F4, 0x0224, 3, 0x0000, 0), /* MX6Q_PAD_GPIO_1__ANATOP_USBOTG_ID */  };  /* Pad names for the pinmux subsystem */ diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 4f20f8dd3d7..17f6dfd8dbf 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -694,10 +694,10 @@ MODULE_DEVICE_TABLE(acpi, ideapad_device_ids);  static int __devinit ideapad_acpi_add(struct acpi_device *adevice)  {  	int ret, i; -	unsigned long cfg; +	int cfg;  	struct ideapad_private *priv; -	if (read_method_int(adevice->handle, "_CFG", (int *)&cfg)) +	if (read_method_int(adevice->handle, "_CFG", &cfg))  		return -ENODEV;  	priv = kzalloc(sizeof(*priv), GFP_KERNEL); @@ -721,7 +721,7 @@ static int __devinit ideapad_acpi_add(struct acpi_device *adevice)  		goto input_failed;  	for (i = 0; i < IDEAPAD_RFKILL_DEV_NUM; i++) { -		if (test_bit(ideapad_rfk_data[i].cfgbit, &cfg)) +		if (test_bit(ideapad_rfk_data[i].cfgbit, &priv->cfg))  			ideapad_register_rfkill(adevice, i);  		else  			priv->rfk[i] = NULL; diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c index 0ffdb3cde2b..9af4257d490 100644 --- a/drivers/platform/x86/intel_ips.c +++ b/drivers/platform/x86/intel_ips.c @@ -72,6 +72,7 @@  #include <linux/string.h>  #include <linux/tick.h>  #include <linux/timer.h> +#include <linux/dmi.h>  #include <drm/i915_drm.h>  #include <asm/msr.h>  #include <asm/processor.h> @@ -1485,6 +1486,24 @@ static DEFINE_PCI_DEVICE_TABLE(ips_id_table) = {  MODULE_DEVICE_TABLE(pci, ips_id_table); +static int ips_blacklist_callback(const struct dmi_system_id *id) +{ +	pr_info("Blacklisted intel_ips for %s\n", id->ident); +	return 1; +} + +static const struct dmi_system_id ips_blacklist[] = { +	{ +		.callback = ips_blacklist_callback, +		.ident = "HP ProBook", +		.matches = { +			DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), +			DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook"), +		}, +	}, +	{ }	/* terminating entry */ +}; +  static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)  {  	u64 platform_info; @@ -1494,6 +1513,9 @@ static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)  	u16 htshi, trc, trc_required_mask;  	u8 tse; +	if (dmi_check_system(ips_blacklist)) +		return -ENODEV; +  	ips = kzalloc(sizeof(struct ips_driver), GFP_KERNEL);  	if (!ips)  		return -ENOMEM; diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 210d4ae547c..d456ff0c73b 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -973,7 +973,7 @@ static ssize_t sony_nc_sysfs_store(struct device *dev,  			       struct device_attribute *attr,  			       const char *buffer, size_t count)  { -	unsigned long value = 0; +	int value;  	int ret = 0;  	struct sony_nc_value *item =  	    container_of(attr, struct sony_nc_value, devattr); @@ -984,7 +984,7 @@ static ssize_t sony_nc_sysfs_store(struct device *dev,  	if (count > 31)  		return -EINVAL; -	if (kstrtoul(buffer, 10, &value)) +	if (kstrtoint(buffer, 10, &value))  		return -EINVAL;  	if (item->validate) @@ -994,7 +994,7 @@ static ssize_t sony_nc_sysfs_store(struct device *dev,  		return value;  	ret = sony_nc_int_call(sony_nc_acpi_handle, *item->acpiset, -			(int *)&value, NULL); +			       &value, NULL);  	if (ret < 0)  		return -EIO; @@ -1010,6 +1010,7 @@ static ssize_t sony_nc_sysfs_store(struct device *dev,  struct sony_backlight_props {  	struct backlight_device *dev;  	int			handle; +	int			cmd_base;  	u8			offset;  	u8			maxlvl;  }; @@ -1037,7 +1038,7 @@ static int sony_nc_get_brightness_ng(struct backlight_device *bd)  	struct sony_backlight_props *sdev =  		(struct sony_backlight_props *)bl_get_data(bd); -	sony_call_snc_handle(sdev->handle, 0x0200, &result); +	sony_call_snc_handle(sdev->handle, sdev->cmd_base + 0x100, &result);  	return (result & 0xff) - sdev->offset;  } @@ -1049,7 +1050,8 @@ static int sony_nc_update_status_ng(struct backlight_device *bd)  		(struct sony_backlight_props *)bl_get_data(bd);  	value = bd->props.brightness + sdev->offset; -	if (sony_call_snc_handle(sdev->handle, 0x0100 | (value << 16), &result)) +	if (sony_call_snc_handle(sdev->handle, sdev->cmd_base | (value << 0x10), +				&result))  		return -EIO;  	return value; @@ -1172,6 +1174,11 @@ static int sony_nc_hotkeys_decode(u32 event, unsigned int handle)  /*   * ACPI callbacks   */ +enum event_types { +	HOTKEY = 1, +	KILLSWITCH, +	GFX_SWITCH +};  static void sony_nc_notify(struct acpi_device *device, u32 event)  {  	u32 real_ev = event; @@ -1196,7 +1203,7 @@ static void sony_nc_notify(struct acpi_device *device, u32 event)  		/* hotkey event */  		case 0x0100:  		case 0x0127: -			ev_type = 1; +			ev_type = HOTKEY;  			real_ev = sony_nc_hotkeys_decode(event, handle);  			if (real_ev > 0) @@ -1216,7 +1223,7 @@ static void sony_nc_notify(struct acpi_device *device, u32 event)  			 * update the rfkill device status when the  			 * switch is moved.  			 */ -			ev_type = 2; +			ev_type = KILLSWITCH;  			sony_call_snc_handle(handle, 0x0100, &result);  			real_ev = result & 0x03; @@ -1226,6 +1233,24 @@ static void sony_nc_notify(struct acpi_device *device, u32 event)  			break; +		case 0x0128: +		case 0x0146: +			/* Hybrid GFX switching */ +			sony_call_snc_handle(handle, 0x0000, &result); +			dprintk("GFX switch event received (reason: %s)\n", +					(result & 0x01) ? +					"switch change" : "unknown"); + +			/* verify the switch state +			 * 1: discrete GFX +			 * 0: integrated GFX +			 */ +			sony_call_snc_handle(handle, 0x0100, &result); + +			ev_type = GFX_SWITCH; +			real_ev = result & 0xff; +			break; +  		default:  			dprintk("Unknown event 0x%x for handle 0x%x\n",  					event, handle); @@ -1238,7 +1263,7 @@ static void sony_nc_notify(struct acpi_device *device, u32 event)  	} else {  		/* old style event */ -		ev_type = 1; +		ev_type = HOTKEY;  		sony_laptop_report_input_event(real_ev);  	} @@ -1893,32 +1918,33 @@ static ssize_t sony_nc_battery_care_limit_store(struct device *dev,  	 *  bits 4,5: store the limit into the EC  	 *  bits 6,7: store the limit into the battery  	 */ +	cmd = 0; -	/* -	 * handle 0x0115 should allow storing on battery too; -	 * handle 0x0136 same as 0x0115 + health status; -	 * handle 0x013f, same as 0x0136 but no storing on the battery -	 * -	 * Store only inside the EC for now, regardless the handle number -	 */ -	if (value == 0) -		/* disable limits */ -		cmd = 0x0; +	if (value > 0) { +		if (value <= 50) +			cmd = 0x20; -	else if (value <= 50) -		cmd = 0x21; +		else if (value <= 80) +			cmd = 0x10; -	else if (value <= 80) -		cmd = 0x11; +		else if (value <= 100) +			cmd = 0x30; -	else if (value <= 100) -		cmd = 0x31; +		else +			return -EINVAL; -	else -		return -EINVAL; +		/* +		 * handle 0x0115 should allow storing on battery too; +		 * handle 0x0136 same as 0x0115 + health status; +		 * handle 0x013f, same as 0x0136 but no storing on the battery +		 */ +		if (bcare_ctl->handle != 0x013f) +			cmd = cmd | (cmd << 2); -	if (sony_call_snc_handle(bcare_ctl->handle, (cmd << 0x10) | 0x0100, -				&result)) +		cmd = (cmd | 0x1) << 0x10; +	} + +	if (sony_call_snc_handle(bcare_ctl->handle, cmd | 0x0100, &result))  		return -EIO;  	return count; @@ -2113,7 +2139,7 @@ static ssize_t sony_nc_thermal_mode_show(struct device *dev,  		struct device_attribute *attr, char *buffer)  {  	ssize_t count = 0; -	unsigned int mode = sony_nc_thermal_mode_get(); +	int mode = sony_nc_thermal_mode_get();  	if (mode < 0)  		return mode; @@ -2472,6 +2498,7 @@ static void sony_nc_backlight_ng_read_limits(int handle,  {  	u64 offset;  	int i; +	int lvl_table_len = 0;  	u8 min = 0xff, max = 0x00;  	unsigned char buffer[32] = { 0 }; @@ -2480,8 +2507,6 @@ static void sony_nc_backlight_ng_read_limits(int handle,  	props->maxlvl = 0xff;  	offset = sony_find_snc_handle(handle); -	if (offset < 0) -		return;  	/* try to read the boundaries from ACPI tables, if we fail the above  	 * defaults should be reasonable @@ -2491,11 +2516,21 @@ static void sony_nc_backlight_ng_read_limits(int handle,  	if (i < 0)  		return; +	switch (handle) { +	case 0x012f: +	case 0x0137: +		lvl_table_len = 9; +		break; +	case 0x143: +		lvl_table_len = 16; +		break; +	} +  	/* the buffer lists brightness levels available, brightness levels are  	 * from position 0 to 8 in the array, other values are used by ALS  	 * control.  	 */ -	for (i = 0; i < 9 && i < ARRAY_SIZE(buffer); i++) { +	for (i = 0; i < lvl_table_len && i < ARRAY_SIZE(buffer); i++) {  		dprintk("Brightness level: %d\n", buffer[i]); @@ -2520,16 +2555,24 @@ static void sony_nc_backlight_setup(void)  	const struct backlight_ops *ops = NULL;  	struct backlight_properties props; -	if (sony_find_snc_handle(0x12f) != -1) { +	if (sony_find_snc_handle(0x12f) >= 0) {  		ops = &sony_backlight_ng_ops; +		sony_bl_props.cmd_base = 0x0100;  		sony_nc_backlight_ng_read_limits(0x12f, &sony_bl_props);  		max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset; -	} else if (sony_find_snc_handle(0x137) != -1) { +	} else if (sony_find_snc_handle(0x137) >= 0) {  		ops = &sony_backlight_ng_ops; +		sony_bl_props.cmd_base = 0x0100;  		sony_nc_backlight_ng_read_limits(0x137, &sony_bl_props);  		max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset; +	} else if (sony_find_snc_handle(0x143) >= 0) { +		ops = &sony_backlight_ng_ops; +		sony_bl_props.cmd_base = 0x3000; +		sony_nc_backlight_ng_read_limits(0x143, &sony_bl_props); +		max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset; +  	} else if (ACPI_SUCCESS(acpi_get_handle(sony_nc_acpi_handle, "GBRT",  						&unused))) {  		ops = &sony_backlight_ops; @@ -2597,6 +2640,12 @@ static int sony_nc_add(struct acpi_device *device)  		}  	} +	result = sony_laptop_setup_input(device); +	if (result) { +		pr_err("Unable to create input devices\n"); +		goto outplatform; +	} +  	if (ACPI_SUCCESS(acpi_get_handle(sony_nc_acpi_handle, "ECON",  					 &handle))) {  		int arg = 1; @@ -2614,12 +2663,6 @@ static int sony_nc_add(struct acpi_device *device)  	}  	/* setup input devices and helper fifo */ -	result = sony_laptop_setup_input(device); -	if (result) { -		pr_err("Unable to create input devices\n"); -		goto outsnc; -	} -  	if (acpi_video_backlight_support()) {  		pr_info("brightness ignored, must be controlled by ACPI video driver\n");  	} else { @@ -2667,22 +2710,21 @@ static int sony_nc_add(struct acpi_device *device)  	return 0; -      out_sysfs: +out_sysfs:  	for (item = sony_nc_values; item->name; ++item) {  		device_remove_file(&sony_pf_device->dev, &item->devattr);  	}  	sony_nc_backlight_cleanup(); - -	sony_laptop_remove_input(); - -      outsnc:  	sony_nc_function_cleanup(sony_pf_device);  	sony_nc_handles_cleanup(sony_pf_device); -      outpresent: +outplatform: +	sony_laptop_remove_input(); + +outpresent:  	sony_pf_remove(); -      outwalk: +outwalk:  	sony_nc_rfkill_cleanup();  	return result;  } diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 39d3aa41add..f56c8ba3a86 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -1085,7 +1085,7 @@ static int __init rpmsg_init(void)  	return ret;  } -module_init(rpmsg_init); +subsys_initcall(rpmsg_init);  static void __exit rpmsg_fini(void)  { diff --git a/drivers/scsi/scsi_wait_scan.c b/drivers/scsi/scsi_wait_scan.c index ae781487461..07273453887 100644 --- a/drivers/scsi/scsi_wait_scan.c +++ b/drivers/scsi/scsi_wait_scan.c @@ -22,11 +22,6 @@ static int __init wait_scan_init(void)  	 * and might not yet have reached the scsi async scanning  	 */  	wait_for_device_probe(); -	/* -	 * and then we wait for the actual asynchronous scsi scan -	 * to finish. -	 */ -	scsi_complete_async_scans();  	return 0;  } diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c index 9888693a18f..664f6e775d0 100644 --- a/drivers/target/target_core_cdb.c +++ b/drivers/target/target_core_cdb.c @@ -1095,7 +1095,7 @@ int target_emulate_write_same(struct se_cmd *cmd)  	if (num_blocks != 0)  		range = num_blocks;  	else -		range = (dev->transport->get_blocks(dev) - lba); +		range = (dev->transport->get_blocks(dev) - lba) + 1;  	pr_debug("WRITE_SAME UNMAP: LBA: %llu Range: %llu\n",  		 (unsigned long long)lba, (unsigned long long)range); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 85564998500..a1bcd927a9e 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -2031,7 +2031,7 @@ static int __core_scsi3_write_aptpl_to_file(  	if (IS_ERR(file) || !file || !file->f_dentry) {  		pr_err("filp_open(%s) for APTPL metadata"  			" failed\n", path); -		return (PTR_ERR(file) < 0 ? PTR_ERR(file) : -ENOENT); +		return IS_ERR(file) ? PTR_ERR(file) : -ENOENT;  	}  	iov[0].iov_base = &buf[0]; @@ -3818,7 +3818,7 @@ int target_scsi3_emulate_pr_out(struct se_cmd *cmd)  			" SPC-2 reservation is held, returning"  			" RESERVATION_CONFLICT\n");  		cmd->scsi_sense_reason = TCM_RESERVATION_CONFLICT; -		ret = EINVAL; +		ret = -EINVAL;  		goto out;  	} @@ -3828,7 +3828,8 @@ int target_scsi3_emulate_pr_out(struct se_cmd *cmd)  	 */  	if (!cmd->se_sess) {  		cmd->scsi_sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; -		return -EINVAL; +		ret = -EINVAL; +		goto out;  	}  	if (cmd->data_length < 24) { diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index f03fb9730f5..5b65f33939a 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -230,6 +230,8 @@ u32 ft_get_task_tag(struct se_cmd *se_cmd)  {  	struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd); +	if (cmd->aborted) +		return ~0;  	return fc_seq_exch(cmd->seq)->rxid;  } diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 5b400730c21..4ee522b3f66 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -86,7 +86,31 @@ static struct {  #endif /* CONFIG_CIFS_WEAK_PW_HASH */  #endif /* CIFS_POSIX */ -/* Forward declarations */ +#ifdef CONFIG_HIGHMEM +/* + * On arches that have high memory, kmap address space is limited. By + * serializing the kmap operations on those arches, we ensure that we don't + * end up with a bunch of threads in writeback with partially mapped page + * arrays, stuck waiting for kmap to come back. That situation prevents + * progress and can deadlock. + */ +static DEFINE_MUTEX(cifs_kmap_mutex); + +static inline void +cifs_kmap_lock(void) +{ +	mutex_lock(&cifs_kmap_mutex); +} + +static inline void +cifs_kmap_unlock(void) +{ +	mutex_unlock(&cifs_kmap_mutex); +} +#else /* !CONFIG_HIGHMEM */ +#define cifs_kmap_lock() do { ; } while(0) +#define cifs_kmap_unlock() do { ; } while(0) +#endif /* CONFIG_HIGHMEM */  /* Mark as invalid, all open files on tree connections since they     were closed when session to server was lost */ @@ -1503,7 +1527,9 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)  	}  	/* marshal up the page array */ +	cifs_kmap_lock();  	len = rdata->marshal_iov(rdata, data_len); +	cifs_kmap_unlock();  	data_len -= len;  	/* issue the read if we have any iovecs left to fill */ @@ -2069,7 +2095,9 @@ cifs_async_writev(struct cifs_writedata *wdata)  	 * and set the iov_len properly for each one. It may also set  	 * wdata->bytes too.  	 */ +	cifs_kmap_lock();  	wdata->marshal_iov(iov, wdata); +	cifs_kmap_unlock();  	cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0ae86ddf221..94b7788c318 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3445,6 +3445,18 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,  #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024)  #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) +/* + * On hosts with high memory, we can't currently support wsize/rsize that are + * larger than we can kmap at once. Cap the rsize/wsize at + * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request + * larger than that anyway. + */ +#ifdef CONFIG_HIGHMEM +#define CIFS_KMAP_SIZE_LIMIT	(LAST_PKMAP * PAGE_CACHE_SIZE) +#else /* CONFIG_HIGHMEM */ +#define CIFS_KMAP_SIZE_LIMIT	(1<<24) +#endif /* CONFIG_HIGHMEM */ +  static unsigned int  cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)  { @@ -3475,6 +3487,9 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)  		wsize = min_t(unsigned int, wsize,  				server->maxBuf - sizeof(WRITE_REQ) + 4); +	/* limit to the amount that we can kmap at once */ +	wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT); +  	/* hard limit of CIFS_MAX_WSIZE */  	wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); @@ -3516,6 +3531,9 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)  	if (!(server->capabilities & CAP_LARGE_READ_X))  		rsize = min_t(unsigned int, CIFSMaxBufSize, rsize); +	/* limit to the amount that we can kmap at once */ +	rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT); +  	/* hard limit of CIFS_MAX_RSIZE */  	rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 0a8224d1c4c..a4217f02fab 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -86,9 +86,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,  	dentry = d_lookup(parent, name);  	if (dentry) { -		/* FIXME: check for inode number changes? */ -		if (dentry->d_inode != NULL) +		inode = dentry->d_inode; +		/* update inode in place if i_ino didn't change */ +		if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) { +			cifs_fattr_to_inode(inode, fattr);  			return dentry; +		}  		d_drop(dentry);  		dput(dentry);  	} diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 3097ee58fd7..f25d4ea14be 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -365,16 +365,14 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov,  	if (mid == NULL)  		return -ENOMEM; -	/* put it on the pending_mid_q */ -	spin_lock(&GlobalMid_Lock); -	list_add_tail(&mid->qhead, &server->pending_mid_q); -	spin_unlock(&GlobalMid_Lock); -  	rc = cifs_sign_smb2(iov, nvec, server, &mid->sequence_number); -	if (rc) -		delete_mid(mid); +	if (rc) { +		DeleteMidQEntry(mid); +		return rc; +	} +  	*ret_mid = mid; -	return rc; +	return 0;  }  /* @@ -407,17 +405,21 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,  	mid->callback_data = cbdata;  	mid->mid_state = MID_REQUEST_SUBMITTED; +	/* put it on the pending_mid_q */ +	spin_lock(&GlobalMid_Lock); +	list_add_tail(&mid->qhead, &server->pending_mid_q); +	spin_unlock(&GlobalMid_Lock); + +  	cifs_in_send_inc(server);  	rc = smb_sendv(server, iov, nvec);  	cifs_in_send_dec(server);  	cifs_save_when_sent(mid);  	mutex_unlock(&server->srv_mutex); -	if (rc) -		goto out_err; +	if (rc == 0) +		return 0; -	return rc; -out_err:  	delete_mid(mid);  	add_credits(server, 1);  	wake_up(&server->request_q); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 74598f67efe..1c8b5567080 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1710,7 +1710,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,  		goto error_tgt_fput;  	/* Check if EPOLLWAKEUP is allowed */ -	if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP)) +	if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND))  		epds.events &= ~EPOLLWAKEUP;  	/* diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 49cf230554a..24a49d47e93 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -735,13 +735,7 @@ static int _prepare_for_striping(struct ore_io_state *ios)  out:  	ios->numdevs = devs_in_group;  	ios->pages_consumed = cur_pg; -	if (unlikely(ret)) { -		if (length == ios->length) -			return ret; -		else -			ios->length -= length; -	} -	return 0; +	return ret;  }  int ore_create(struct ore_io_state *ios) diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index d222c77cfa1..5f376d14fdc 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c @@ -144,26 +144,26 @@ static void _sp2d_reset(struct __stripe_pages_2d *sp2d,  {  	unsigned data_devs = sp2d->data_devs;  	unsigned group_width = data_devs + sp2d->parity; -	unsigned p; +	int p, c;  	if (!sp2d->needed)  		return; -	for (p = 0; p < sp2d->pages_in_unit; p++) { -		struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; - -		if (_1ps->write_count < group_width) { -			unsigned c; +	for (c = data_devs - 1; c >= 0; --c) +		for (p = sp2d->pages_in_unit - 1; p >= 0; --p) { +			struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; -			for (c = 0; c < data_devs; c++) -				if (_1ps->page_is_read[c]) { -					struct page *page = _1ps->pages[c]; +			if (_1ps->page_is_read[c]) { +				struct page *page = _1ps->pages[c]; -					r4w->put_page(priv, page); -					_1ps->page_is_read[c] = false; -				} +				r4w->put_page(priv, page); +				_1ps->page_is_read[c] = false; +			}  		} +	for (p = 0; p < sp2d->pages_in_unit; p++) { +		struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; +  		memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages));  		_1ps->write_count = 0;  		_1ps->tx = NULL; @@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)   * ios->sp2d[p][*], xor is calculated the same way. These pages are   * allocated/freed and don't go through cache   */ -static int _read_4_write(struct ore_io_state *ios) +static int _read_4_write_first_stripe(struct ore_io_state *ios)  { -	struct ore_io_state *ios_read;  	struct ore_striping_info read_si;  	struct __stripe_pages_2d *sp2d = ios->sp2d;  	u64 offset = ios->si.first_stripe_start; -	u64 last_stripe_end; -	unsigned bytes_in_stripe = ios->si.bytes_in_stripe; -	unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1; -	int ret; +	unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;  	if (offset == ios->offset) /* Go to start collect $200 */  		goto read_last_stripe; @@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios)  	min_p = _sp2d_min_pg(sp2d);  	max_p = _sp2d_max_pg(sp2d); +	ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n", +		   offset, ios->offset, min_p, max_p); +  	for (c = 0; ; c++) {  		ore_calc_stripe_info(ios->layout, offset, 0, &read_si);  		read_si.obj_offset += min_p * PAGE_SIZE; @@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios)  	}  read_last_stripe: +	return 0; +} + +static int _read_4_write_last_stripe(struct ore_io_state *ios) +{ +	struct ore_striping_info read_si; +	struct __stripe_pages_2d *sp2d = ios->sp2d; +	u64 offset; +	u64 last_stripe_end; +	unsigned bytes_in_stripe = ios->si.bytes_in_stripe; +	unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1; +  	offset = ios->offset + ios->length;  	if (offset % PAGE_SIZE)  		_add_to_r4w_last_page(ios, &offset); @@ -527,15 +538,15 @@ read_last_stripe:  	c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,  		       ios->layout->mirrors_p1, read_si.par_dev, read_si.dev); -	BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end); -	/* unaligned IO must be within a single stripe */ -  	if (min_p == sp2d->pages_in_unit) {  		/* Didn't do it yet */  		min_p = _sp2d_min_pg(sp2d);  		max_p = _sp2d_max_pg(sp2d);  	} +	ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n", +		   offset, last_stripe_end, min_p, max_p); +  	while (offset < last_stripe_end) {  		struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; @@ -568,6 +579,15 @@ read_last_stripe:  	}  read_it: +	return 0; +} + +static int _read_4_write_execute(struct ore_io_state *ios) +{ +	struct ore_io_state *ios_read; +	unsigned i; +	int ret; +  	ios_read = ios->ios_read_4_write;  	if (!ios_read)  		return 0; @@ -591,6 +611,8 @@ read_it:  	}  	_mark_read4write_pages_uptodate(ios_read, ret); +	ore_put_io_state(ios_read); +	ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */  	return 0;  } @@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,  			/* If first stripe, Read in all read4write pages  			 * (if needed) before we calculate the first parity.  			 */ -			_read_4_write(ios); +			_read_4_write_first_stripe(ios);  		} +		if (!cur_len) /* If last stripe r4w pages of last stripe */ +			_read_4_write_last_stripe(ios); +		_read_4_write_execute(ios);  		for (i = 0; i < num_pages; i++) {  			pages[i] = _raid_page_alloc(); @@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios,  int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)  { -	struct ore_layout *layout = ios->layout; -  	if (ios->parity_pages) { +		struct ore_layout *layout = ios->layout;  		unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE; -		unsigned stripe_size = ios->si.bytes_in_stripe; -		u64 last_stripe, first_stripe;  		if (_sp2d_alloc(pages_in_unit, layout->group_width,  				layout->parity, &ios->sp2d)) {  			return -ENOMEM;  		} - -		/* Round io down to last full strip */ -		first_stripe = div_u64(ios->offset, stripe_size); -		last_stripe = div_u64(ios->offset + ios->length, stripe_size); - -		/* If an IO spans more then a single stripe it must end at -		 * a stripe boundary. The reminder at the end is pushed into the -		 * next IO. -		 */ -		if (last_stripe != first_stripe) { -			ios->length = last_stripe * stripe_size - ios->offset; - -			BUG_ON(!ios->length); -			ios->nr_pages = (ios->length + PAGE_SIZE - 1) / -					PAGE_SIZE; -			ios->si.length = ios->length; /*make it consistent */ -		}  	}  	return 0;  } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index e34deac3f36..6ec6f9ee2fe 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -268,7 +268,6 @@ group_extend_out:  		err = ext4_move_extents(filp, donor_filp, me.orig_start,  					me.donor_start, me.len, &me.moved_len);  		mnt_drop_write_file(filp); -		mnt_drop_write(filp->f_path.mnt);  		if (copy_to_user((struct move_extent __user *)arg,  				 &me, sizeof(me))) diff --git a/fs/fifo.c b/fs/fifo.c index b1a524d798e..cf6f4345ceb 100644 --- a/fs/fifo.c +++ b/fs/fifo.c @@ -14,7 +14,7 @@  #include <linux/sched.h>  #include <linux/pipe_fs_i.h> -static void wait_for_partner(struct inode* inode, unsigned int *cnt) +static int wait_for_partner(struct inode* inode, unsigned int *cnt)  {  	int cur = *cnt;	 @@ -23,6 +23,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt)  		if (signal_pending(current))  			break;  	} +	return cur == *cnt ? -ERESTARTSYS : 0;  }  static void wake_up_partner(struct inode* inode) @@ -67,8 +68,7 @@ static int fifo_open(struct inode *inode, struct file *filp)  				 * seen a writer */  				filp->f_version = pipe->w_counter;  			} else { -				wait_for_partner(inode, &pipe->w_counter); -				if(signal_pending(current)) +				if (wait_for_partner(inode, &pipe->w_counter))  					goto err_rd;  			}  		} @@ -90,8 +90,7 @@ static int fifo_open(struct inode *inode, struct file *filp)  			wake_up_partner(inode);  		if (!pipe->readers) { -			wait_for_partner(inode, &pipe->r_counter); -			if (signal_pending(current)) +			if (wait_for_partner(inode, &pipe->r_counter))  				goto err_wr;  		}  		break; diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index b47277baeba..f50d3e8d6f2 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -454,7 +454,10 @@ int objio_read_pagelist(struct nfs_read_data *rdata)  	objios->ios->done = _read_done;  	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,  		rdata->args.offset, rdata->args.count); -	return ore_read(objios->ios); +	ret = ore_read(objios->ios); +	if (unlikely(ret)) +		objio_free_result(&objios->oir); +	return ret;  }  /* @@ -486,8 +489,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)  	struct nfs_write_data *wdata = objios->oir.rpcdata;  	struct address_space *mapping = wdata->header->inode->i_mapping;  	pgoff_t index = offset / PAGE_SIZE; -	struct page *page = find_get_page(mapping, index); +	struct page *page; +	loff_t i_size = i_size_read(wdata->header->inode); + +	if (offset >= i_size) { +		*uptodate = true; +		dprintk("%s: g_zero_page index=0x%lx\n", __func__, index); +		return ZERO_PAGE(0); +	} +	page = find_get_page(mapping, index);  	if (!page) {  		page = find_or_create_page(mapping, index, GFP_NOFS);  		if (unlikely(!page)) { @@ -507,8 +518,10 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)  static void __r4w_put_page(void *priv, struct page *page)  { -	dprintk("%s: index=0x%lx\n", __func__, page->index); -	page_cache_release(page); +	dprintk("%s: index=0x%lx\n", __func__, +		(page == ZERO_PAGE(0)) ? -1UL : page->index); +	if (ZERO_PAGE(0) != page) +		page_cache_release(page);  	return;  } @@ -539,8 +552,10 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)  	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,  		wdata->args.offset, wdata->args.count);  	ret = ore_write(objios->ios); -	if (unlikely(ret)) +	if (unlikely(ret)) { +		objio_free_result(&objios->oir);  		return ret; +	}  	if (objios->sync)  		_write_done(objios->ios, objios); diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index ef3d1ba6d99..15e2fc5aa60 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -718,8 +718,12 @@ static int fixup_free_space(struct ubifs_info *c)  		lnum = ubifs_next_log_lnum(c, lnum);  	} -	/* Fixup the current log head */ -	err = fixup_leb(c, c->lhead_lnum, c->lhead_offs); +	/* +	 * Fixup the log head which contains the only a CS node at the +	 * beginning. +	 */ +	err = fixup_leb(c, c->lhead_lnum, +			ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size));  	if (err)  		goto out; diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 9d1aeb7e273..4f33c32affe 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -1074,13 +1074,13 @@ restart:  	 * If we couldn't get anything, give up.  	 */  	if (bno_cur_lt == NULL && bno_cur_gt == NULL) { +		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); +  		if (!forced++) {  			trace_xfs_alloc_near_busy(args);  			xfs_log_force(args->mp, XFS_LOG_SYNC);  			goto restart;  		} - -		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);  		trace_xfs_alloc_size_neither(args);  		args->agbno = NULLAGBLOCK;  		return 0; @@ -2434,13 +2434,22 @@ xfs_alloc_vextent_worker(  	current_restore_flags_nested(&pflags, PF_FSTRANS);  } - -int				/* error */ +/* + * Data allocation requests often come in with little stack to work on. Push + * them off to a worker thread so there is lots of stack to use. Metadata + * requests, OTOH, are generally from low stack usage paths, so avoid the + * context switch overhead here. + */ +int  xfs_alloc_vextent( -	xfs_alloc_arg_t	*args)	/* allocation argument structure */ +	struct xfs_alloc_arg	*args)  {  	DECLARE_COMPLETION_ONSTACK(done); +	if (!args->userdata) +		return __xfs_alloc_vextent(args); + +  	args->done = &done;  	INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);  	queue_work(xfs_alloc_wq, &args->work); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index a4beb421018..269b35c084d 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -989,27 +989,6 @@ xfs_buf_ioerror_alert(  		(__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length);  } -int -xfs_bwrite( -	struct xfs_buf		*bp) -{ -	int			error; - -	ASSERT(xfs_buf_islocked(bp)); - -	bp->b_flags |= XBF_WRITE; -	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); - -	xfs_bdstrat_cb(bp); - -	error = xfs_buf_iowait(bp); -	if (error) { -		xfs_force_shutdown(bp->b_target->bt_mount, -				   SHUTDOWN_META_IO_ERROR); -	} -	return error; -} -  /*   * Called when we want to stop a buffer from getting written or read.   * We attach the EIO error, muck with its flags, and call xfs_buf_ioend @@ -1079,14 +1058,7 @@ xfs_bioerror_relse(  	return EIO;  } - -/* - * All xfs metadata buffers except log state machine buffers - * get this attached as their b_bdstrat callback function. - * This is so that we can catch a buffer - * after prematurely unpinning it to forcibly shutdown the filesystem. - */ -int +STATIC int  xfs_bdstrat_cb(  	struct xfs_buf	*bp)  { @@ -1107,6 +1079,27 @@ xfs_bdstrat_cb(  	return 0;  } +int +xfs_bwrite( +	struct xfs_buf		*bp) +{ +	int			error; + +	ASSERT(xfs_buf_islocked(bp)); + +	bp->b_flags |= XBF_WRITE; +	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); + +	xfs_bdstrat_cb(bp); + +	error = xfs_buf_iowait(bp); +	if (error) { +		xfs_force_shutdown(bp->b_target->bt_mount, +				   SHUTDOWN_META_IO_ERROR); +	} +	return error; +} +  /*   * Wrapper around bdstrat so that we can stop data from going to disk in case   * we are shutting down the filesystem.  Typically user data goes thru this @@ -1243,7 +1236,7 @@ xfs_buf_iorequest(  	 */  	atomic_set(&bp->b_io_remaining, 1);  	_xfs_buf_ioapply(bp); -	_xfs_buf_ioend(bp, 0); +	_xfs_buf_ioend(bp, 1);  	xfs_buf_rele(bp);  } diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 7f1d1392ce3..79344c48008 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -180,7 +180,6 @@ extern void xfs_buf_unlock(xfs_buf_t *);  extern int xfs_bwrite(struct xfs_buf *bp);  extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); -extern int xfs_bdstrat_cb(struct xfs_buf *);  extern void xfs_buf_ioend(xfs_buf_t *,	int);  extern void xfs_buf_ioerror(xfs_buf_t *, int); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 45df2b857d4..d9e451115f9 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -954,7 +954,7 @@ xfs_buf_iodone_callbacks(  		if (!XFS_BUF_ISSTALE(bp)) {  			bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE; -			xfs_bdstrat_cb(bp); +			xfs_buf_iorequest(bp);  		} else {  			xfs_buf_relse(bp);  		} diff --git a/include/asm-generic/dma-contiguous.h b/include/asm-generic/dma-contiguous.h index c544356b374..294b1e755ab 100644 --- a/include/asm-generic/dma-contiguous.h +++ b/include/asm-generic/dma-contiguous.h @@ -18,7 +18,7 @@ static inline void dev_set_cma_area(struct device *dev, struct cma *cma)  {  	if (dev)  		dev->cma_area = cma; -	if (!dev || !dma_contiguous_default_area) +	if (!dev && !dma_contiguous_default_area)  		dma_contiguous_default_area = cma;  } diff --git a/include/linux/capability.h b/include/linux/capability.h index 68d56effc32..d10b7ed595b 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -360,11 +360,11 @@ struct cpu_vfs_cap_data {  #define CAP_WAKE_ALARM            35 -/* Allow preventing system suspends while epoll events are pending */ +/* Allow preventing system suspends */ -#define CAP_EPOLLWAKEUP      36 +#define CAP_BLOCK_SUSPEND    36 -#define CAP_LAST_CAP         CAP_EPOLLWAKEUP +#define CAP_LAST_CAP         CAP_BLOCK_SUSPEND  #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 2521a95fa6d..44c87e731e9 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -163,16 +163,8 @@ struct ceph_connection {  	/* connection negotiation temps */  	char in_banner[CEPH_BANNER_MAX_LEN]; -	union { -		struct {  /* outgoing connection */ -			struct ceph_msg_connect out_connect; -			struct ceph_msg_connect_reply in_reply; -		}; -		struct {  /* incoming */ -			struct ceph_msg_connect in_connect; -			struct ceph_msg_connect_reply out_reply; -		}; -	}; +	struct ceph_msg_connect out_connect; +	struct ceph_msg_connect_reply in_reply;  	struct ceph_entity_addr actual_peer_addr;  	/* message out temps */ diff --git a/include/linux/device.h b/include/linux/device.h index 161d96241b1..6de94151ff6 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -865,8 +865,6 @@ extern int (*platform_notify_remove)(struct device *dev);  extern struct device *get_device(struct device *dev);  extern void put_device(struct device *dev); -extern void wait_for_device_probe(void); -  #ifdef CONFIG_DEVTMPFS  extern int devtmpfs_create_node(struct device *dev);  extern int devtmpfs_delete_node(struct device *dev); diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 6f8be328770..f4bb378ccf6 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -34,7 +34,7 @@   * re-allowed until epoll_wait is called again after consuming the wakeup   * event(s).   * - * Requires CAP_EPOLLWAKEUP + * Requires CAP_BLOCK_SUSPEND   */  #define EPOLLWAKEUP (1 << 29) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 176a939d154..af961d6f7ab 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -65,7 +65,7 @@ struct trace_iterator {  	void			*private;  	int			cpu_file;  	struct mutex		mutex; -	struct ring_buffer_iter	*buffer_iter[NR_CPUS]; +	struct ring_buffer_iter	**buffer_iter;  	unsigned long		iter_flags;  	/* trace_seq for __print_flags() and __print_symbolic() etc. */ @@ -207,6 +207,9 @@ struct ftrace_event_call {  	 *   bit 1:		enabled  	 *   bit 2:		filter_active  	 *   bit 3:		enabled cmd record +	 *   bit 4:		allow trace by non root (cap any) +	 *   bit 5:		failed to apply filter +	 *   bit 6:		ftrace internal event (do not enable)  	 *  	 * Changes to flags must hold the event_mutex.  	 * diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9e65eff6af3..8a747618699 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -168,8 +168,8 @@ extern struct cred init_cred;  	.children	= LIST_HEAD_INIT(tsk.children),			\  	.sibling	= LIST_HEAD_INIT(tsk.sibling),			\  	.group_leader	= &tsk,						\ -	RCU_INIT_POINTER(.real_cred, &init_cred),			\ -	RCU_INIT_POINTER(.cred, &init_cred),				\ +	RCU_POINTER_INITIALIZER(real_cred, &init_cred),			\ +	RCU_POINTER_INITIALIZER(cred, &init_cred),			\  	.comm		= INIT_TASK_COMM,				\  	.thread		= INIT_THREAD,					\  	.fs		= &init_fs,					\ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index e6ca56de993..78e2ada50cd 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -308,6 +308,8 @@ enum {  struct intel_iommu {  	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */ +	u64 		reg_phys; /* physical address of hw register set */ +	u64		reg_size; /* size of hw register set */  	u64		cap;  	u64		ecap;  	u32		gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ diff --git a/include/linux/irq.h b/include/linux/irq.h index a5261e3d2e3..553fb66da13 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -150,9 +150,7 @@ struct irq_data {  	void			*handler_data;  	void			*chip_data;  	struct msi_desc		*msi_desc; -#ifdef CONFIG_SMP  	cpumask_var_t		affinity; -#endif  };  /* diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index c513a40510f..0976fc46d1e 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -42,8 +42,7 @@   * allowed.   *   * Not initializing the key (static data is initialized to 0s anyway) is the - * same as using STATIC_KEY_INIT_FALSE and static_key_false() is - * equivalent with static_branch(). + * same as using STATIC_KEY_INIT_FALSE.   *  */ @@ -107,12 +106,6 @@ static __always_inline bool static_key_true(struct static_key *key)  	return !static_key_false(key);  } -/* Deprecated. Please use 'static_key_false() instead. */ -static __always_inline bool static_branch(struct static_key *key) -{ -	return arch_static_branch(key); -} -  extern struct jump_entry __start___jump_table[];  extern struct jump_entry __stop___jump_table[]; @@ -166,14 +159,6 @@ static __always_inline bool static_key_true(struct static_key *key)  	return false;  } -/* Deprecated. Please use 'static_key_false() instead. */ -static __always_inline bool static_branch(struct static_key *key) -{ -	if (unlikely(atomic_read(&key->enabled)) > 0) -		return true; -	return false; -} -  static inline void static_key_slow_inc(struct static_key *key)  {  	atomic_inc(&key->enabled); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e07f5e0c5df..604382143bc 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -377,7 +377,6 @@ extern enum system_states {  	SYSTEM_HALT,  	SYSTEM_POWER_OFF,  	SYSTEM_RESTART, -	SYSTEM_SUSPEND_DISK,  } system_state;  #define TAINT_PROPRIETARY_MODULE	0 diff --git a/include/linux/key.h b/include/linux/key.h index 4cd22ed627e..cef3b315ba7 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -303,7 +303,9 @@ static inline bool key_is_instantiated(const struct key *key)  				   rwsem_is_locked(&((struct key *)(KEY))->sem)))  #define rcu_assign_keypointer(KEY, PAYLOAD)				\ -	(rcu_assign_pointer((KEY)->payload.rcudata, PAYLOAD)) +do {									\ +	rcu_assign_pointer((KEY)->payload.rcudata, (PAYLOAD));		\ +} while (0)  #ifdef CONFIG_SYSCTL  extern ctl_table key_sysctls[]; diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index d6bd50110ec..2e7a1e032c7 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -55,12 +55,17 @@ struct kmsg_dumper {  #ifdef CONFIG_PRINTK  void kmsg_dump(enum kmsg_dump_reason reason); +bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, +			       char *line, size_t size, size_t *len); +  bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,  			char *line, size_t size, size_t *len);  bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,  			  char *buf, size_t size, size_t *len); +void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper); +  void kmsg_dump_rewind(struct kmsg_dumper *dumper);  int kmsg_dump_register(struct kmsg_dumper *dumper); @@ -71,6 +76,13 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason)  {  } +static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, +					     bool syslog, const char *line, +					     size_t size, size_t *len) +{ +	return false; +} +  static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,  				const char *line, size_t size, size_t *len)  { @@ -83,6 +95,10 @@ static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,  	return false;  } +static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) +{ +} +  static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper)  {  } diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index ab741b0d007..5f187026b81 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2755,6 +2755,17 @@  #define PCI_DEVICE_ID_INTEL_IOAT_SNB7	0x3c27  #define PCI_DEVICE_ID_INTEL_IOAT_SNB8	0x3c2e  #define PCI_DEVICE_ID_INTEL_IOAT_SNB9	0x3c2f +#define PCI_DEVICE_ID_INTEL_UNC_HA	0x3c46 +#define PCI_DEVICE_ID_INTEL_UNC_IMC0	0x3cb0 +#define PCI_DEVICE_ID_INTEL_UNC_IMC1	0x3cb1 +#define PCI_DEVICE_ID_INTEL_UNC_IMC2	0x3cb4 +#define PCI_DEVICE_ID_INTEL_UNC_IMC3	0x3cb5 +#define PCI_DEVICE_ID_INTEL_UNC_QPI0	0x3c41 +#define PCI_DEVICE_ID_INTEL_UNC_QPI1	0x3c42 +#define PCI_DEVICE_ID_INTEL_UNC_R2PCIE	0x3c43 +#define PCI_DEVICE_ID_INTEL_UNC_R3QPI0	0x3c44 +#define PCI_DEVICE_ID_INTEL_UNC_R3QPI1	0x3c45 +#define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX	0x3ce0  #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f  #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0  #define PCI_DEVICE_ID_INTEL_5100_21	0x65f5 diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 45db49f64bb..76c5c8b724a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -677,6 +677,7 @@ struct hw_perf_event {  			u64		last_tag;  			unsigned long	config_base;  			unsigned long	event_base; +			int		event_base_rdpmc;  			int		idx;  			int		last_cpu; @@ -1106,6 +1107,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,  				struct task_struct *task,  				perf_overflow_handler_t callback,  				void *context); +extern void perf_pmu_migrate_context(struct pmu *pmu, +				int src_cpu, int dst_cpu);  extern u64 perf_event_read_value(struct perf_event *event,  				 u64 *enabled, u64 *running); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9cac722b169..115ead2b515 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -147,6 +147,7 @@ extern void synchronize_sched(void);  extern void __rcu_read_lock(void);  extern void __rcu_read_unlock(void); +extern void rcu_read_unlock_special(struct task_struct *t);  void synchronize_rcu(void);  /* @@ -255,6 +256,10 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head)  }  #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ +#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) +extern int rcu_is_cpu_idle(void); +#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */ +  #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)  bool rcu_lockdep_current_cpu_online(void);  #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ @@ -266,15 +271,6 @@ static inline bool rcu_lockdep_current_cpu_online(void)  #ifdef CONFIG_DEBUG_LOCK_ALLOC -#ifdef CONFIG_PROVE_RCU -extern int rcu_is_cpu_idle(void); -#else /* !CONFIG_PROVE_RCU */ -static inline int rcu_is_cpu_idle(void) -{ -	return 0; -} -#endif /* else !CONFIG_PROVE_RCU */ -  static inline void rcu_lock_acquire(struct lockdep_map *map)  {  	lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_); @@ -431,8 +427,7 @@ extern int rcu_my_thread_group_empty(void);  static inline void rcu_preempt_sleep_check(void)  {  	rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), -			   "Illegal context switch in RCU read-side " -			   "critical section"); +			   "Illegal context switch in RCU read-side critical section");  }  #else /* #ifdef CONFIG_PROVE_RCU */  static inline void rcu_preempt_sleep_check(void) @@ -513,10 +508,10 @@ static inline void rcu_preempt_sleep_check(void)  		(_________p1); \  	})  #define __rcu_assign_pointer(p, v, space) \ -	({ \ +	do { \  		smp_wmb(); \  		(p) = (typeof(*v) __force space *)(v); \ -	}) +	} while (0)  /** @@ -851,7 +846,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)   *   * Assigns the specified value to the specified RCU-protected   * pointer, ensuring that any concurrent RCU readers will see - * any prior initialization.  Returns the value assigned. + * any prior initialization.   *   * Inserts memory barriers on architectures that require them   * (which is most of them), and also prevents the compiler from @@ -903,25 +898,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)   * the reader-accessible portions of the linked structure.   */  #define RCU_INIT_POINTER(p, v) \ -		p = (typeof(*v) __force __rcu *)(v) - -static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) -{ -	return offset < 4096; -} - -static __always_inline -void __kfree_rcu(struct rcu_head *head, unsigned long offset) -{ -	typedef void (*rcu_callback)(struct rcu_head *); - -	BUILD_BUG_ON(!__builtin_constant_p(offset)); - -	/* See the kfree_rcu() header comment. */ -	BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); +	do { \ +		p = (typeof(*v) __force __rcu *)(v); \ +	} while (0) -	kfree_call_rcu(head, (rcu_callback)offset); -} +/** + * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer + * + * GCC-style initialization for an RCU-protected pointer in a structure field. + */ +#define RCU_POINTER_INITIALIZER(p, v) \ +		.p = (typeof(*v) __force __rcu *)(v)  /*   * Does the specified offset indicate that the corresponding rcu_head @@ -935,7 +922,7 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)  #define __kfree_rcu(head, offset) \  	do { \  		BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \ -		call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ +		kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \  	} while (0)  /** diff --git a/include/linux/sched.h b/include/linux/sched.h index 4a1f493e0fe..64d9df5c3a4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1581,7 +1581,6 @@ struct task_struct {  #endif  #ifdef CONFIG_UPROBES  	struct uprobe_task *utask; -	int uprobe_srcu_id;  #endif  }; diff --git a/include/linux/smp.h b/include/linux/smp.h index 717fb746c9a..dd6f06be3c9 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -90,10 +90,6 @@ void kick_all_cpus_sync(void);  void __init call_function_init(void);  void generic_smp_call_function_single_interrupt(void);  void generic_smp_call_function_interrupt(void); -void ipi_call_lock(void); -void ipi_call_unlock(void); -void ipi_call_lock_irq(void); -void ipi_call_unlock_irq(void);  #else  static inline void call_function_init(void) { }  #endif @@ -181,7 +177,6 @@ static inline int up_smp_call_function(smp_call_func_t func, void *info)  	} while (0)  static inline void smp_send_reschedule(int cpu) { } -#define num_booting_cpus()			1  #define smp_prepare_boot_cpu()			do {} while (0)  #define smp_call_function_many(mask, func, info, wait) \  			(up_smp_call_function(func, info)) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index bd96ecd0e05..802de56c41e 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -153,7 +153,7 @@ static inline void tracepoint_synchronize_unregister(void)  	}								\  	static inline void trace_##name##_rcuidle(proto)		\  	{								\ -		if (static_branch(&__tracepoint_##name.key))		\ +		if (static_key_false(&__tracepoint_##name.key))		\  			__DO_TRACE(&__tracepoint_##name,		\  				TP_PROTO(data_proto),			\  				TP_ARGS(data_args),			\ diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d6146b4811c..95374d1696a 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1425,7 +1425,7 @@ static inline void ip_vs_notrack(struct sk_buff *skb)  	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);  	if (!ct || !nf_ct_is_untracked(ct)) { -		nf_reset(skb); +		nf_conntrack_put(skb->nfct);  		skb->nfct = &nf_ct_untracked_get()->ct_general;  		skb->nfctinfo = IP_CT_NEW;  		nf_conntrack_get(skb->nfct); diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index a88fb693938..e1ce1048fe5 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -78,7 +78,7 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)  	struct net *net = nf_ct_net(ct);  	struct nf_conntrack_ecache *e; -	if (net->ct.nf_conntrack_event_cb == NULL) +	if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb))  		return;  	e = nf_ct_ecache_find(ct); diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index d274734b2aa..5bde94d8585 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -541,6 +541,50 @@ TRACE_EVENT(rcu_torture_read,  		  __entry->rcutorturename, __entry->rhp)  ); +/* + * Tracepoint for _rcu_barrier() execution.  The string "s" describes + * the _rcu_barrier phase: + *	"Begin": rcu_barrier_callback() started. + *	"Check": rcu_barrier_callback() checking for piggybacking. + *	"EarlyExit": rcu_barrier_callback() piggybacked, thus early exit. + *	"Inc1": rcu_barrier_callback() piggyback check counter incremented. + *	"Offline": rcu_barrier_callback() found offline CPU + *	"OnlineQ": rcu_barrier_callback() found online CPU with callbacks. + *	"OnlineNQ": rcu_barrier_callback() found online CPU, no callbacks. + *	"IRQ": An rcu_barrier_callback() callback posted on remote CPU. + *	"CB": An rcu_barrier_callback() invoked a callback, not the last. + *	"LastCB": An rcu_barrier_callback() invoked the last callback. + *	"Inc2": rcu_barrier_callback() piggyback check counter incremented. + * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument + * is the count of remaining callbacks, and "done" is the piggybacking count. + */ +TRACE_EVENT(rcu_barrier, + +	TP_PROTO(char *rcuname, char *s, int cpu, int cnt, unsigned long done), + +	TP_ARGS(rcuname, s, cpu, cnt, done), + +	TP_STRUCT__entry( +		__field(char *, rcuname) +		__field(char *, s) +		__field(int, cpu) +		__field(int, cnt) +		__field(unsigned long, done) +	), + +	TP_fast_assign( +		__entry->rcuname = rcuname; +		__entry->s = s; +		__entry->cpu = cpu; +		__entry->cnt = cnt; +		__entry->done = done; +	), + +	TP_printk("%s %s cpu %d remaining %d # %lu", +		  __entry->rcuname, __entry->s, __entry->cpu, __entry->cnt, +		  __entry->done) +); +  #else /* #ifdef CONFIG_RCU_TRACE */  #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) @@ -564,6 +608,7 @@ TRACE_EVENT(rcu_torture_read,  #define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \  	do { } while (0)  #define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) +#define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0)  #endif /* #else #ifdef CONFIG_RCU_TRACE */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 769724944fc..c6bc2faaf26 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -571,6 +571,7 @@ static inline void ftrace_test_probe_##call(void)			\  #undef __print_flags  #undef __print_symbolic +#undef __print_hex  #undef __get_dynamic_array  #undef __get_str diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 67b847dfa2b..1f91413edb8 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -14,6 +14,7 @@  #include <linux/ctype.h>  #include <linux/string.h>  #include <linux/kernel.h> +#include <linux/kmsg_dump.h>  #include <linux/reboot.h>  #include <linux/sched.h>  #include <linux/sysrq.h> @@ -2040,8 +2041,15 @@ static int kdb_env(int argc, const char **argv)   */  static int kdb_dmesg(int argc, const char **argv)  { -	char *syslog_data[4], *start, *end, c = '\0', *p; -	int diag, logging, logsize, lines = 0, adjust = 0, n; +	int diag; +	int logging; +	int lines = 0; +	int adjust = 0; +	int n = 0; +	int skip = 0; +	struct kmsg_dumper dumper = { .active = 1 }; +	size_t len; +	char buf[201];  	if (argc > 2)  		return KDB_ARGCOUNT; @@ -2064,22 +2072,10 @@ static int kdb_dmesg(int argc, const char **argv)  		kdb_set(2, setargs);  	} -	/* syslog_data[0,1] physical start, end+1.  syslog_data[2,3] -	 * logical start, end+1. */ -	kdb_syslog_data(syslog_data); -	if (syslog_data[2] == syslog_data[3]) -		return 0; -	logsize = syslog_data[1] - syslog_data[0]; -	start = syslog_data[2]; -	end = syslog_data[3]; -#define KDB_WRAP(p) (((p - syslog_data[0]) % logsize) + syslog_data[0]) -	for (n = 0, p = start; p < end; ++p) { -		c = *KDB_WRAP(p); -		if (c == '\n') -			++n; -	} -	if (c != '\n') -		++n; +	kmsg_dump_rewind_nolock(&dumper); +	while (kmsg_dump_get_line_nolock(&dumper, 1, NULL, 0, NULL)) +		n++; +  	if (lines < 0) {  		if (adjust >= n)  			kdb_printf("buffer only contains %d lines, nothing " @@ -2087,21 +2083,11 @@ static int kdb_dmesg(int argc, const char **argv)  		else if (adjust - lines >= n)  			kdb_printf("buffer only contains %d lines, last %d "  				   "lines printed\n", n, n - adjust); -		if (adjust) { -			for (; start < end && adjust; ++start) { -				if (*KDB_WRAP(start) == '\n') -					--adjust; -			} -			if (start < end) -				++start; -		} -		for (p = start; p < end && lines; ++p) { -			if (*KDB_WRAP(p) == '\n') -				++lines; -		} -		end = p; +		skip = adjust; +		lines = abs(lines);  	} else if (lines > 0) { -		int skip = n - (adjust + lines); +		skip = n - lines - adjust; +		lines = abs(lines);  		if (adjust >= n) {  			kdb_printf("buffer only contains %d lines, "  				   "nothing printed\n", n); @@ -2112,35 +2098,24 @@ static int kdb_dmesg(int argc, const char **argv)  			kdb_printf("buffer only contains %d lines, first "  				   "%d lines printed\n", n, lines);  		} -		for (; start < end && skip; ++start) { -			if (*KDB_WRAP(start) == '\n') -				--skip; -		} -		for (p = start; p < end && lines; ++p) { -			if (*KDB_WRAP(p) == '\n') -				--lines; -		} -		end = p; +	} else { +		lines = n;  	} -	/* Do a line at a time (max 200 chars) to reduce protocol overhead */ -	c = '\n'; -	while (start != end) { -		char buf[201]; -		p = buf; -		if (KDB_FLAG(CMD_INTERRUPT)) -			return 0; -		while (start < end && (c = *KDB_WRAP(start)) && -		       (p - buf) < sizeof(buf)-1) { -			++start; -			*p++ = c; -			if (c == '\n') -				break; + +	if (skip >= n || skip < 0) +		return 0; + +	kmsg_dump_rewind_nolock(&dumper); +	while (kmsg_dump_get_line_nolock(&dumper, 1, buf, sizeof(buf), &len)) { +		if (skip) { +			skip--; +			continue;  		} -		*p = '\0'; -		kdb_printf("%s", buf); +		if (!lines--) +			break; + +		kdb_printf("%.*s\n", (int)len - 1, buf);  	} -	if (c != '\n') -		kdb_printf("\n");  	return 0;  } diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 47c4e56e513..392ec6a2584 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -205,7 +205,6 @@ extern char kdb_grep_string[];  extern int kdb_grep_leading;  extern int kdb_grep_trailing;  extern char *kdb_cmds[]; -extern void kdb_syslog_data(char *syslog_data[]);  extern unsigned long kdb_task_state_string(const char *);  extern char kdb_task_state_char (const struct task_struct *);  extern unsigned long kdb_task_state(const struct task_struct *p, diff --git a/kernel/events/core.c b/kernel/events/core.c index d7d71d6ec97..f1cf0edeb39 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1645,6 +1645,8 @@ perf_install_in_context(struct perf_event_context *ctx,  	lockdep_assert_held(&ctx->mutex);  	event->ctx = ctx; +	if (event->cpu != -1) +		event->cpu = cpu;  	if (!task) {  		/* @@ -6252,6 +6254,8 @@ SYSCALL_DEFINE5(perf_event_open,  		}  	} +	get_online_cpus(); +  	event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,  				 NULL, NULL);  	if (IS_ERR(event)) { @@ -6304,7 +6308,7 @@ SYSCALL_DEFINE5(perf_event_open,  	/*  	 * Get the target context (task or percpu):  	 */ -	ctx = find_get_context(pmu, task, cpu); +	ctx = find_get_context(pmu, task, event->cpu);  	if (IS_ERR(ctx)) {  		err = PTR_ERR(ctx);  		goto err_alloc; @@ -6377,20 +6381,23 @@ SYSCALL_DEFINE5(perf_event_open,  	mutex_lock(&ctx->mutex);  	if (move_group) { -		perf_install_in_context(ctx, group_leader, cpu); +		synchronize_rcu(); +		perf_install_in_context(ctx, group_leader, event->cpu);  		get_ctx(ctx);  		list_for_each_entry(sibling, &group_leader->sibling_list,  				    group_entry) { -			perf_install_in_context(ctx, sibling, cpu); +			perf_install_in_context(ctx, sibling, event->cpu);  			get_ctx(ctx);  		}  	} -	perf_install_in_context(ctx, event, cpu); +	perf_install_in_context(ctx, event, event->cpu);  	++ctx->generation;  	perf_unpin_context(ctx);  	mutex_unlock(&ctx->mutex); +	put_online_cpus(); +  	event->owner = current;  	mutex_lock(¤t->perf_event_mutex); @@ -6419,6 +6426,7 @@ err_context:  err_alloc:  	free_event(event);  err_task: +	put_online_cpus();  	if (task)  		put_task_struct(task);  err_group_fd: @@ -6479,6 +6487,39 @@ err:  }  EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); +void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) +{ +	struct perf_event_context *src_ctx; +	struct perf_event_context *dst_ctx; +	struct perf_event *event, *tmp; +	LIST_HEAD(events); + +	src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx; +	dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx; + +	mutex_lock(&src_ctx->mutex); +	list_for_each_entry_safe(event, tmp, &src_ctx->event_list, +				 event_entry) { +		perf_remove_from_context(event); +		put_ctx(src_ctx); +		list_add(&event->event_entry, &events); +	} +	mutex_unlock(&src_ctx->mutex); + +	synchronize_rcu(); + +	mutex_lock(&dst_ctx->mutex); +	list_for_each_entry_safe(event, tmp, &events, event_entry) { +		list_del(&event->event_entry); +		if (event->state >= PERF_EVENT_STATE_OFF) +			event->state = PERF_EVENT_STATE_INACTIVE; +		perf_install_in_context(dst_ctx, event, dst_cpu); +		get_ctx(dst_ctx); +	} +	mutex_unlock(&dst_ctx->mutex); +} +EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); +  static void sync_child_event(struct perf_event *child_event,  			       struct task_struct *child)  { diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 985be4d80fe..f93532748bc 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -38,13 +38,29 @@  #define UINSNS_PER_PAGE			(PAGE_SIZE/UPROBE_XOL_SLOT_BYTES)  #define MAX_UPROBE_XOL_SLOTS		UINSNS_PER_PAGE -static struct srcu_struct uprobes_srcu;  static struct rb_root uprobes_tree = RB_ROOT;  static DEFINE_SPINLOCK(uprobes_treelock);	/* serialize rbtree access */  #define UPROBES_HASH_SZ	13 +/* + * We need separate register/unregister and mmap/munmap lock hashes because + * of mmap_sem nesting. + * + * uprobe_register() needs to install probes on (potentially) all processes + * and thus needs to acquire multiple mmap_sems (consequtively, not + * concurrently), whereas uprobe_mmap() is called while holding mmap_sem + * for the particular process doing the mmap. + * + * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem + * because of lock order against i_mmap_mutex. This means there's a hole in + * the register vma iteration where a mmap() can happen. + * + * Thus uprobe_register() can race with uprobe_mmap() and we can try and + * install a probe where one is already installed. + */ +  /* serialize (un)register */  static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; @@ -61,17 +77,6 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];   */  static atomic_t uprobe_events = ATOMIC_INIT(0); -/* - * Maintain a temporary per vma info that can be used to search if a vma - * has already been handled. This structure is introduced since extending - * vm_area_struct wasnt recommended. - */ -struct vma_info { -	struct list_head	probe_list; -	struct mm_struct	*mm; -	loff_t			vaddr; -}; -  struct uprobe {  	struct rb_node		rb_node;	/* node in the rb tree */  	atomic_t		ref; @@ -100,7 +105,8 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register)  	if (!is_register)  		return true; -	if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) == (VM_READ|VM_EXEC)) +	if ((vma->vm_flags & (VM_HUGETLB|VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) +				== (VM_READ|VM_EXEC))  		return true;  	return false; @@ -129,33 +135,17 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset)  static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage)  {  	struct mm_struct *mm = vma->vm_mm; -	pgd_t *pgd; -	pud_t *pud; -	pmd_t *pmd; -	pte_t *ptep; -	spinlock_t *ptl;  	unsigned long addr; -	int err = -EFAULT; +	spinlock_t *ptl; +	pte_t *ptep;  	addr = page_address_in_vma(page, vma);  	if (addr == -EFAULT) -		goto out; - -	pgd = pgd_offset(mm, addr); -	if (!pgd_present(*pgd)) -		goto out; - -	pud = pud_offset(pgd, addr); -	if (!pud_present(*pud)) -		goto out; - -	pmd = pmd_offset(pud, addr); -	if (!pmd_present(*pmd)) -		goto out; +		return -EFAULT; -	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); +	ptep = page_check_address(page, mm, addr, &ptl, 0);  	if (!ptep) -		goto out; +		return -EAGAIN;  	get_page(kpage);  	page_add_new_anon_rmap(kpage, vma, addr); @@ -174,10 +164,8 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct  		try_to_free_swap(page);  	put_page(page);  	pte_unmap_unlock(ptep, ptl); -	err = 0; -out: -	return err; +	return 0;  }  /** @@ -222,9 +210,8 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,  	void *vaddr_old, *vaddr_new;  	struct vm_area_struct *vma;  	struct uprobe *uprobe; -	loff_t addr;  	int ret; - +retry:  	/* Read the page with vaddr into memory */  	ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma);  	if (ret <= 0) @@ -246,10 +233,6 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,  	if (mapping != vma->vm_file->f_mapping)  		goto put_out; -	addr = vma_address(vma, uprobe->offset); -	if (vaddr != (unsigned long)addr) -		goto put_out; -  	ret = -ENOMEM;  	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);  	if (!new_page) @@ -267,11 +250,7 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,  	vaddr_new = kmap_atomic(new_page);  	memcpy(vaddr_new, vaddr_old, PAGE_SIZE); - -	/* poke the new insn in, ASSUMES we don't cross page boundary */ -	vaddr &= ~PAGE_MASK; -	BUG_ON(vaddr + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); -	memcpy(vaddr_new + vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); +	memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE);  	kunmap_atomic(vaddr_new);  	kunmap_atomic(vaddr_old); @@ -291,6 +270,8 @@ unlock_out:  put_out:  	put_page(old_page); +	if (unlikely(ret == -EAGAIN)) +		goto retry;  	return ret;  } @@ -312,7 +293,7 @@ static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_  	void *vaddr_new;  	int ret; -	ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &page, NULL); +	ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL);  	if (ret <= 0)  		return ret; @@ -333,10 +314,20 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)  	uprobe_opcode_t opcode;  	int result; +	if (current->mm == mm) { +		pagefault_disable(); +		result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr, +								sizeof(opcode)); +		pagefault_enable(); + +		if (likely(result == 0)) +			goto out; +	} +  	result = read_opcode(mm, vaddr, &opcode);  	if (result)  		return result; - +out:  	if (is_swbp_insn(&opcode))  		return 1; @@ -355,7 +346,9 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)  int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)  {  	int result; - +	/* +	 * See the comment near uprobes_hash(). +	 */  	result = is_swbp_at_addr(mm, vaddr);  	if (result == 1)  		return -EEXIST; @@ -520,7 +513,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)  	uprobe->inode = igrab(inode);  	uprobe->offset = offset;  	init_rwsem(&uprobe->consumer_rwsem); -	INIT_LIST_HEAD(&uprobe->pending_list);  	/* add to uprobes_tree, sorted on inode:offset */  	cur_uprobe = insert_uprobe(uprobe); @@ -588,20 +580,22 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)  }  static int -__copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *insn, -			unsigned long nbytes, unsigned long offset) +__copy_insn(struct address_space *mapping, struct file *filp, char *insn, +			unsigned long nbytes, loff_t offset)  { -	struct file *filp = vma->vm_file;  	struct page *page;  	void *vaddr; -	unsigned long off1; -	unsigned long idx; +	unsigned long off; +	pgoff_t idx;  	if (!filp)  		return -EINVAL; -	idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT); -	off1 = offset &= ~PAGE_MASK; +	if (!mapping->a_ops->readpage) +		return -EIO; + +	idx = offset >> PAGE_CACHE_SHIFT; +	off = offset & ~PAGE_MASK;  	/*  	 * Ensure that the page that has the original instruction is @@ -612,22 +606,20 @@ __copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *ins  		return PTR_ERR(page);  	vaddr = kmap_atomic(page); -	memcpy(insn, vaddr + off1, nbytes); +	memcpy(insn, vaddr + off, nbytes);  	kunmap_atomic(vaddr);  	page_cache_release(page);  	return 0;  } -static int -copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) +static int copy_insn(struct uprobe *uprobe, struct file *filp)  {  	struct address_space *mapping;  	unsigned long nbytes;  	int bytes; -	addr &= ~PAGE_MASK; -	nbytes = PAGE_SIZE - addr; +	nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK);  	mapping = uprobe->inode->i_mapping;  	/* Instruction at end of binary; copy only available bytes */ @@ -638,13 +630,13 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)  	/* Instruction at the page-boundary; copy bytes in second page */  	if (nbytes < bytes) { -		if (__copy_insn(mapping, vma, uprobe->arch.insn + nbytes, -				bytes - nbytes, uprobe->offset + nbytes)) -			return -ENOMEM; - +		int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes, +				bytes - nbytes, uprobe->offset + nbytes); +		if (err) +			return err;  		bytes = nbytes;  	} -	return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset); +	return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset);  }  /* @@ -672,9 +664,8 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)   */  static int  install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, -			struct vm_area_struct *vma, loff_t vaddr) +			struct vm_area_struct *vma, unsigned long vaddr)  { -	unsigned long addr;  	int ret;  	/* @@ -687,20 +678,22 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,  	if (!uprobe->consumers)  		return -EEXIST; -	addr = (unsigned long)vaddr; -  	if (!(uprobe->flags & UPROBE_COPY_INSN)) { -		ret = copy_insn(uprobe, vma, addr); +		ret = copy_insn(uprobe, vma->vm_file);  		if (ret)  			return ret;  		if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) -			return -EEXIST; +			return -ENOTSUPP; -		ret = arch_uprobe_analyze_insn(&uprobe->arch, mm); +		ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);  		if (ret)  			return ret; +		/* write_opcode() assumes we don't cross page boundary */ +		BUG_ON((uprobe->offset & ~PAGE_MASK) + +				UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); +  		uprobe->flags |= UPROBE_COPY_INSN;  	} @@ -713,7 +706,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,  	 * Hence increment before and decrement on failure.  	 */  	atomic_inc(&mm->uprobes_state.count); -	ret = set_swbp(&uprobe->arch, mm, addr); +	ret = set_swbp(&uprobe->arch, mm, vaddr);  	if (ret)  		atomic_dec(&mm->uprobes_state.count); @@ -721,27 +714,21 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,  }  static void -remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr) +remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)  { -	if (!set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true)) +	if (!set_orig_insn(&uprobe->arch, mm, vaddr, true))  		atomic_dec(&mm->uprobes_state.count);  }  /* - * There could be threads that have hit the breakpoint and are entering the - * notifier code and trying to acquire the uprobes_treelock. The thread - * calling delete_uprobe() that is removing the uprobe from the rb_tree can - * race with these threads and might acquire the uprobes_treelock compared - * to some of the breakpoint hit threads. In such a case, the breakpoint - * hit threads will not find the uprobe. The current unregistering thread - * waits till all other threads have hit a breakpoint, to acquire the - * uprobes_treelock before the uprobe is removed from the rbtree. + * There could be threads that have already hit the breakpoint. They + * will recheck the current insn and restart if find_uprobe() fails. + * See find_active_uprobe().   */  static void delete_uprobe(struct uprobe *uprobe)  {  	unsigned long flags; -	synchronize_srcu(&uprobes_srcu);  	spin_lock_irqsave(&uprobes_treelock, flags);  	rb_erase(&uprobe->rb_node, &uprobes_tree);  	spin_unlock_irqrestore(&uprobes_treelock, flags); @@ -750,139 +737,135 @@ static void delete_uprobe(struct uprobe *uprobe)  	atomic_dec(&uprobe_events);  } -static struct vma_info * -__find_next_vma_info(struct address_space *mapping, struct list_head *head, -			struct vma_info *vi, loff_t offset, bool is_register) +struct map_info { +	struct map_info *next; +	struct mm_struct *mm; +	unsigned long vaddr; +}; + +static inline struct map_info *free_map_info(struct map_info *info) +{ +	struct map_info *next = info->next; +	kfree(info); +	return next; +} + +static struct map_info * +build_map_info(struct address_space *mapping, loff_t offset, bool is_register)  { +	unsigned long pgoff = offset >> PAGE_SHIFT;  	struct prio_tree_iter iter;  	struct vm_area_struct *vma; -	struct vma_info *tmpvi; -	unsigned long pgoff; -	int existing_vma; -	loff_t vaddr; - -	pgoff = offset >> PAGE_SHIFT; +	struct map_info *curr = NULL; +	struct map_info *prev = NULL; +	struct map_info *info; +	int more = 0; + again: +	mutex_lock(&mapping->i_mmap_mutex);  	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {  		if (!valid_vma(vma, is_register))  			continue; -		existing_vma = 0; -		vaddr = vma_address(vma, offset); - -		list_for_each_entry(tmpvi, head, probe_list) { -			if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) { -				existing_vma = 1; -				break; -			} +		if (!prev && !more) { +			/* +			 * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through +			 * reclaim. This is optimistic, no harm done if it fails. +			 */ +			prev = kmalloc(sizeof(struct map_info), +					GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN); +			if (prev) +				prev->next = NULL;  		} - -		/* -		 * Another vma needs a probe to be installed. However skip -		 * installing the probe if the vma is about to be unlinked. -		 */ -		if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) { -			vi->mm = vma->vm_mm; -			vi->vaddr = vaddr; -			list_add(&vi->probe_list, head); - -			return vi; +		if (!prev) { +			more++; +			continue;  		} -	} - -	return NULL; -} -/* - * Iterate in the rmap prio tree  and find a vma where a probe has not - * yet been inserted. - */ -static struct vma_info * -find_next_vma_info(struct address_space *mapping, struct list_head *head, -		loff_t offset, bool is_register) -{ -	struct vma_info *vi, *retvi; +		if (!atomic_inc_not_zero(&vma->vm_mm->mm_users)) +			continue; -	vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL); -	if (!vi) -		return ERR_PTR(-ENOMEM); +		info = prev; +		prev = prev->next; +		info->next = curr; +		curr = info; -	mutex_lock(&mapping->i_mmap_mutex); -	retvi = __find_next_vma_info(mapping, head, vi, offset, is_register); +		info->mm = vma->vm_mm; +		info->vaddr = vma_address(vma, offset); +	}  	mutex_unlock(&mapping->i_mmap_mutex); -	if (!retvi) -		kfree(vi); +	if (!more) +		goto out; -	return retvi; +	prev = curr; +	while (curr) { +		mmput(curr->mm); +		curr = curr->next; +	} + +	do { +		info = kmalloc(sizeof(struct map_info), GFP_KERNEL); +		if (!info) { +			curr = ERR_PTR(-ENOMEM); +			goto out; +		} +		info->next = prev; +		prev = info; +	} while (--more); + +	goto again; + out: +	while (prev) +		prev = free_map_info(prev); +	return curr;  }  static int register_for_each_vma(struct uprobe *uprobe, bool is_register)  { -	struct list_head try_list; -	struct vm_area_struct *vma; -	struct address_space *mapping; -	struct vma_info *vi, *tmpvi; -	struct mm_struct *mm; -	loff_t vaddr; -	int ret; +	struct map_info *info; +	int err = 0; -	mapping = uprobe->inode->i_mapping; -	INIT_LIST_HEAD(&try_list); +	info = build_map_info(uprobe->inode->i_mapping, +					uprobe->offset, is_register); +	if (IS_ERR(info)) +		return PTR_ERR(info); -	ret = 0; +	while (info) { +		struct mm_struct *mm = info->mm; +		struct vm_area_struct *vma; -	for (;;) { -		vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register); -		if (!vi) -			break; +		if (err) +			goto free; -		if (IS_ERR(vi)) { -			ret = PTR_ERR(vi); -			break; -		} +		down_write(&mm->mmap_sem); +		vma = find_vma(mm, (unsigned long)info->vaddr); +		if (!vma || !valid_vma(vma, is_register)) +			goto unlock; -		mm = vi->mm; -		down_read(&mm->mmap_sem); -		vma = find_vma(mm, (unsigned long)vi->vaddr); -		if (!vma || !valid_vma(vma, is_register)) { -			list_del(&vi->probe_list); -			kfree(vi); -			up_read(&mm->mmap_sem); -			mmput(mm); -			continue; -		} -		vaddr = vma_address(vma, uprobe->offset);  		if (vma->vm_file->f_mapping->host != uprobe->inode || -						vaddr != vi->vaddr) { -			list_del(&vi->probe_list); -			kfree(vi); -			up_read(&mm->mmap_sem); -			mmput(mm); -			continue; -		} - -		if (is_register) -			ret = install_breakpoint(uprobe, mm, vma, vi->vaddr); -		else -			remove_breakpoint(uprobe, mm, vi->vaddr); +		    vma_address(vma, uprobe->offset) != info->vaddr) +			goto unlock; -		up_read(&mm->mmap_sem); -		mmput(mm);  		if (is_register) { -			if (ret && ret == -EEXIST) -				ret = 0; -			if (ret) -				break; +			err = install_breakpoint(uprobe, mm, vma, info->vaddr); +			/* +			 * We can race against uprobe_mmap(), see the +			 * comment near uprobe_hash(). +			 */ +			if (err == -EEXIST) +				err = 0; +		} else { +			remove_breakpoint(uprobe, mm, info->vaddr);  		} + unlock: +		up_write(&mm->mmap_sem); + free: +		mmput(mm); +		info = free_map_info(info);  	} -	list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) { -		list_del(&vi->probe_list); -		kfree(vi); -	} - -	return ret; +	return err;  }  static int __uprobe_register(struct uprobe *uprobe) @@ -1048,7 +1031,7 @@ static void build_probe_list(struct inode *inode, struct list_head *head)  int uprobe_mmap(struct vm_area_struct *vma)  {  	struct list_head tmp_list; -	struct uprobe *uprobe, *u; +	struct uprobe *uprobe;  	struct inode *inode;  	int ret, count; @@ -1066,12 +1049,9 @@ int uprobe_mmap(struct vm_area_struct *vma)  	ret = 0;  	count = 0; -	list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { -		loff_t vaddr; - -		list_del(&uprobe->pending_list); +	list_for_each_entry(uprobe, &tmp_list, pending_list) {  		if (!ret) { -			vaddr = vma_address(vma, uprobe->offset); +			loff_t vaddr = vma_address(vma, uprobe->offset);  			if (vaddr < vma->vm_start || vaddr >= vma->vm_end) {  				put_uprobe(uprobe); @@ -1079,8 +1059,10 @@ int uprobe_mmap(struct vm_area_struct *vma)  			}  			ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); - -			/* Ignore double add: */ +			/* +			 * We can race against uprobe_register(), see the +			 * comment near uprobe_hash(). +			 */  			if (ret == -EEXIST) {  				ret = 0; @@ -1115,7 +1097,7 @@ int uprobe_mmap(struct vm_area_struct *vma)  void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)  {  	struct list_head tmp_list; -	struct uprobe *uprobe, *u; +	struct uprobe *uprobe;  	struct inode *inode;  	if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) @@ -1132,11 +1114,8 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon  	mutex_lock(uprobes_mmap_hash(inode));  	build_probe_list(inode, &tmp_list); -	list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { -		loff_t vaddr; - -		list_del(&uprobe->pending_list); -		vaddr = vma_address(vma, uprobe->offset); +	list_for_each_entry(uprobe, &tmp_list, pending_list) { +		loff_t vaddr = vma_address(vma, uprobe->offset);  		if (vaddr >= start && vaddr < end) {  			/* @@ -1378,9 +1357,6 @@ void uprobe_free_utask(struct task_struct *t)  {  	struct uprobe_task *utask = t->utask; -	if (t->uprobe_srcu_id != -1) -		srcu_read_unlock_raw(&uprobes_srcu, t->uprobe_srcu_id); -  	if (!utask)  		return; @@ -1398,7 +1374,6 @@ void uprobe_free_utask(struct task_struct *t)  void uprobe_copy_process(struct task_struct *t)  {  	t->utask = NULL; -	t->uprobe_srcu_id = -1;  }  /* @@ -1417,7 +1392,6 @@ static struct uprobe_task *add_utask(void)  	if (unlikely(!utask))  		return NULL; -	utask->active_uprobe = NULL;  	current->utask = utask;  	return utask;  } @@ -1479,41 +1453,64 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)  	return false;  } +static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) +{ +	struct mm_struct *mm = current->mm; +	struct uprobe *uprobe = NULL; +	struct vm_area_struct *vma; + +	down_read(&mm->mmap_sem); +	vma = find_vma(mm, bp_vaddr); +	if (vma && vma->vm_start <= bp_vaddr) { +		if (valid_vma(vma, false)) { +			struct inode *inode; +			loff_t offset; + +			inode = vma->vm_file->f_mapping->host; +			offset = bp_vaddr - vma->vm_start; +			offset += (vma->vm_pgoff << PAGE_SHIFT); +			uprobe = find_uprobe(inode, offset); +		} + +		if (!uprobe) +			*is_swbp = is_swbp_at_addr(mm, bp_vaddr); +	} else { +		*is_swbp = -EFAULT; +	} +	up_read(&mm->mmap_sem); + +	return uprobe; +} +  /*   * Run handler and ask thread to singlestep.   * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.   */  static void handle_swbp(struct pt_regs *regs)  { -	struct vm_area_struct *vma;  	struct uprobe_task *utask;  	struct uprobe *uprobe; -	struct mm_struct *mm;  	unsigned long bp_vaddr; +	int uninitialized_var(is_swbp); -	uprobe = NULL;  	bp_vaddr = uprobe_get_swbp_addr(regs); -	mm = current->mm; -	down_read(&mm->mmap_sem); -	vma = find_vma(mm, bp_vaddr); - -	if (vma && vma->vm_start <= bp_vaddr && valid_vma(vma, false)) { -		struct inode *inode; -		loff_t offset; - -		inode = vma->vm_file->f_mapping->host; -		offset = bp_vaddr - vma->vm_start; -		offset += (vma->vm_pgoff << PAGE_SHIFT); -		uprobe = find_uprobe(inode, offset); -	} - -	srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id); -	current->uprobe_srcu_id = -1; -	up_read(&mm->mmap_sem); +	uprobe = find_active_uprobe(bp_vaddr, &is_swbp);  	if (!uprobe) { -		/* No matching uprobe; signal SIGTRAP. */ -		send_sig(SIGTRAP, current, 0); +		if (is_swbp > 0) { +			/* No matching uprobe; signal SIGTRAP. */ +			send_sig(SIGTRAP, current, 0); +		} else { +			/* +			 * Either we raced with uprobe_unregister() or we can't +			 * access this memory. The latter is only possible if +			 * another thread plays with our ->mm. In both cases +			 * we can simply restart. If this vma was unmapped we +			 * can pretend this insn was not executed yet and get +			 * the (correct) SIGSEGV after restart. +			 */ +			instruction_pointer_set(regs, bp_vaddr); +		}  		return;  	} @@ -1620,7 +1617,6 @@ int uprobe_pre_sstep_notifier(struct pt_regs *regs)  		utask->state = UTASK_BP_HIT;  	set_thread_flag(TIF_UPROBE); -	current->uprobe_srcu_id = srcu_read_lock_raw(&uprobes_srcu);  	return 1;  } @@ -1655,7 +1651,6 @@ static int __init init_uprobes(void)  		mutex_init(&uprobes_mutex[i]);  		mutex_init(&uprobes_mmap_mutex[i]);  	} -	init_srcu_struct(&uprobes_srcu);  	return register_die_notifier(&uprobe_exception_nb);  } diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 8b53db38a27..238025f5472 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -27,7 +27,6 @@  #include <linux/syscore_ops.h>  #include <linux/ctype.h>  #include <linux/genhd.h> -#include <scsi/scsi_scan.h>  #include "power.h" @@ -748,13 +747,6 @@ static int software_resume(void)  			async_synchronize_full();  		} -		/* -		 * We can't depend on SCSI devices being available after loading -		 * one of their modules until scsi_complete_async_scans() is -		 * called and the resume device usually is a SCSI one. -		 */ -		scsi_complete_async_scans(); -  		swsusp_resume_device = name_to_dev_t(resume_file);  		if (!swsusp_resume_device) {  			error = -ENODEV; diff --git a/kernel/power/user.c b/kernel/power/user.c index 91b0fd021a9..4ed81e74f86 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -24,7 +24,6 @@  #include <linux/console.h>  #include <linux/cpu.h>  #include <linux/freezer.h> -#include <scsi/scsi_scan.h>  #include <asm/uaccess.h> @@ -84,7 +83,6 @@ static int snapshot_open(struct inode *inode, struct file *filp)  		 * appear.  		 */  		wait_for_device_probe(); -		scsi_complete_async_scans();  		data->swap = -1;  		data->mode = O_WRONLY; diff --git a/kernel/printk.c b/kernel/printk.c index 177fa49357a..ac4bc9e7946 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1192,21 +1192,6 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)  	return do_syslog(type, buf, len, SYSLOG_FROM_CALL);  } -#ifdef	CONFIG_KGDB_KDB -/* kdb dmesg command needs access to the syslog buffer.  do_syslog() - * uses locks so it cannot be used during debugging.  Just tell kdb - * where the start and end of the physical and logical logs are.  This - * is equivalent to do_syslog(3). - */ -void kdb_syslog_data(char *syslog_data[4]) -{ -	syslog_data[0] = log_buf; -	syslog_data[1] = log_buf + log_buf_len; -	syslog_data[2] = log_buf + log_first_idx; -	syslog_data[3] = log_buf + log_next_idx; -} -#endif	/* CONFIG_KGDB_KDB */ -  static bool __read_mostly ignore_loglevel;  static int __init ignore_loglevel_setup(char *str) @@ -2525,7 +2510,7 @@ void kmsg_dump(enum kmsg_dump_reason reason)  }  /** - * kmsg_dump_get_line - retrieve one kmsg log line + * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version)   * @dumper: registered kmsg dumper   * @syslog: include the "<4>" prefixes   * @line: buffer to copy the line to @@ -2540,11 +2525,12 @@ void kmsg_dump(enum kmsg_dump_reason reason)   *   * A return value of FALSE indicates that there are no more records to   * read. + * + * The function is similar to kmsg_dump_get_line(), but grabs no locks.   */ -bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, -			char *line, size_t size, size_t *len) +bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, +			       char *line, size_t size, size_t *len)  { -	unsigned long flags;  	struct log *msg;  	size_t l = 0;  	bool ret = false; @@ -2552,7 +2538,6 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,  	if (!dumper->active)  		goto out; -	raw_spin_lock_irqsave(&logbuf_lock, flags);  	if (dumper->cur_seq < log_first_seq) {  		/* messages are gone, move to first available one */  		dumper->cur_seq = log_first_seq; @@ -2560,10 +2545,8 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,  	}  	/* last entry */ -	if (dumper->cur_seq >= log_next_seq) { -		raw_spin_unlock_irqrestore(&logbuf_lock, flags); +	if (dumper->cur_seq >= log_next_seq)  		goto out; -	}  	msg = log_from_idx(dumper->cur_idx);  	l = msg_print_text(msg, 0, syslog, line, size); @@ -2571,12 +2554,41 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,  	dumper->cur_idx = log_next(dumper->cur_idx);  	dumper->cur_seq++;  	ret = true; -	raw_spin_unlock_irqrestore(&logbuf_lock, flags);  out:  	if (len)  		*len = l;  	return ret;  } + +/** + * kmsg_dump_get_line - retrieve one kmsg log line + * @dumper: registered kmsg dumper + * @syslog: include the "<4>" prefixes + * @line: buffer to copy the line to + * @size: maximum size of the buffer + * @len: length of line placed into buffer + * + * Start at the beginning of the kmsg buffer, with the oldest kmsg + * record, and copy one record into the provided buffer. + * + * Consecutive calls will return the next available record moving + * towards the end of the buffer with the youngest messages. + * + * A return value of FALSE indicates that there are no more records to + * read. + */ +bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, +			char *line, size_t size, size_t *len) +{ +	unsigned long flags; +	bool ret; + +	raw_spin_lock_irqsave(&logbuf_lock, flags); +	ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); +	raw_spin_unlock_irqrestore(&logbuf_lock, flags); + +	return ret; +}  EXPORT_SYMBOL_GPL(kmsg_dump_get_line);  /** @@ -2679,6 +2691,24 @@ out:  EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);  /** + * kmsg_dump_rewind_nolock - reset the interator (unlocked version) + * @dumper: registered kmsg dumper + * + * Reset the dumper's iterator so that kmsg_dump_get_line() and + * kmsg_dump_get_buffer() can be called again and used multiple + * times within the same dumper.dump() callback. + * + * The function is similar to kmsg_dump_rewind(), but grabs no locks. + */ +void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) +{ +	dumper->cur_seq = clear_seq; +	dumper->cur_idx = clear_idx; +	dumper->next_seq = log_next_seq; +	dumper->next_idx = log_next_idx; +} + +/**   * kmsg_dump_rewind - reset the interator   * @dumper: registered kmsg dumper   * @@ -2691,10 +2721,7 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper)  	unsigned long flags;  	raw_spin_lock_irqsave(&logbuf_lock, flags); -	dumper->cur_seq = clear_seq; -	dumper->cur_idx = clear_idx; -	dumper->next_seq = log_next_seq; -	dumper->next_idx = log_next_idx; +	kmsg_dump_rewind_nolock(dumper);  	raw_spin_unlock_irqrestore(&logbuf_lock, flags);  }  EXPORT_SYMBOL_GPL(kmsg_dump_rewind); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 95cba41ce1e..4e6a61b15e8 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -54,6 +54,50 @@  #ifdef CONFIG_PREEMPT_RCU  /* + * Preemptible RCU implementation for rcu_read_lock(). + * Just increment ->rcu_read_lock_nesting, shared state will be updated + * if we block. + */ +void __rcu_read_lock(void) +{ +	current->rcu_read_lock_nesting++; +	barrier();  /* critical section after entry code. */ +} +EXPORT_SYMBOL_GPL(__rcu_read_lock); + +/* + * Preemptible RCU implementation for rcu_read_unlock(). + * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost + * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then + * invoke rcu_read_unlock_special() to clean up after a context switch + * in an RCU read-side critical section and other special cases. + */ +void __rcu_read_unlock(void) +{ +	struct task_struct *t = current; + +	if (t->rcu_read_lock_nesting != 1) { +		--t->rcu_read_lock_nesting; +	} else { +		barrier();  /* critical section before exit code. */ +		t->rcu_read_lock_nesting = INT_MIN; +		barrier();  /* assign before ->rcu_read_unlock_special load */ +		if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) +			rcu_read_unlock_special(t); +		barrier();  /* ->rcu_read_unlock_special load before assign */ +		t->rcu_read_lock_nesting = 0; +	} +#ifdef CONFIG_PROVE_LOCKING +	{ +		int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); + +		WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); +	} +#endif /* #ifdef CONFIG_PROVE_LOCKING */ +} +EXPORT_SYMBOL_GPL(__rcu_read_unlock); + +/*   * Check for a task exiting while in a preemptible-RCU read-side   * critical section, clean up if so.  No need to issue warnings,   * as debug_check_no_locks_held() already does this if lockdep diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 37a5444204d..547b1fe5b05 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -172,7 +172,7 @@ void rcu_irq_enter(void)  	local_irq_restore(flags);  } -#ifdef CONFIG_PROVE_RCU +#ifdef CONFIG_DEBUG_LOCK_ALLOC  /*   * Test whether RCU thinks that the current CPU is idle. @@ -183,7 +183,7 @@ int rcu_is_cpu_idle(void)  }  EXPORT_SYMBOL(rcu_is_cpu_idle); -#endif /* #ifdef CONFIG_PROVE_RCU */ +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */  /*   * Test whether the current CPU was interrupted from idle.  Nested diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index fc31a2d6510..918fd1e8509 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -132,7 +132,6 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {  	RCU_TRACE(.rcb.name = "rcu_preempt")  }; -static void rcu_read_unlock_special(struct task_struct *t);  static int rcu_preempted_readers_exp(void);  static void rcu_report_exp_done(void); @@ -351,8 +350,9 @@ static int rcu_initiate_boost(void)  			rcu_preempt_ctrlblk.boost_tasks =  				rcu_preempt_ctrlblk.gp_tasks;  		invoke_rcu_callbacks(); -	} else +	} else {  		RCU_TRACE(rcu_initiate_boost_trace()); +	}  	return 1;  } @@ -527,23 +527,11 @@ void rcu_preempt_note_context_switch(void)  }  /* - * Tiny-preemptible RCU implementation for rcu_read_lock(). - * Just increment ->rcu_read_lock_nesting, shared state will be updated - * if we block. - */ -void __rcu_read_lock(void) -{ -	current->rcu_read_lock_nesting++; -	barrier();  /* needed if we ever invoke rcu_read_lock in rcutiny.c */ -} -EXPORT_SYMBOL_GPL(__rcu_read_lock); - -/*   * Handle special cases during rcu_read_unlock(), such as needing to   * notify RCU core processing or task having blocked during the RCU   * read-side critical section.   */ -static noinline void rcu_read_unlock_special(struct task_struct *t) +void rcu_read_unlock_special(struct task_struct *t)  {  	int empty;  	int empty_exp; @@ -627,38 +615,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)  }  /* - * Tiny-preemptible RCU implementation for rcu_read_unlock(). - * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost - * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then - * invoke rcu_read_unlock_special() to clean up after a context switch - * in an RCU read-side critical section and other special cases. - */ -void __rcu_read_unlock(void) -{ -	struct task_struct *t = current; - -	barrier();  /* needed if we ever invoke rcu_read_unlock in rcutiny.c */ -	if (t->rcu_read_lock_nesting != 1) -		--t->rcu_read_lock_nesting; -	else { -		t->rcu_read_lock_nesting = INT_MIN; -		barrier();  /* assign before ->rcu_read_unlock_special load */ -		if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) -			rcu_read_unlock_special(t); -		barrier();  /* ->rcu_read_unlock_special load before assign */ -		t->rcu_read_lock_nesting = 0; -	} -#ifdef CONFIG_PROVE_LOCKING -	{ -		int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); - -		WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); -	} -#endif /* #ifdef CONFIG_PROVE_LOCKING */ -} -EXPORT_SYMBOL_GPL(__rcu_read_unlock); - -/*   * Check for a quiescent state from the current CPU.  When a task blocks,   * the task is recorded in the rcu_preempt_ctrlblk structure, which is   * checked elsewhere.  This is called from the scheduling-clock interrupt. @@ -823,9 +779,9 @@ void synchronize_rcu_expedited(void)  		rpcp->exp_tasks = NULL;  	/* Wait for tail of ->blkd_tasks list to drain. */ -	if (!rcu_preempted_readers_exp()) +	if (!rcu_preempted_readers_exp()) {  		local_irq_restore(flags); -	else { +	} else {  		rcu_initiate_boost();  		local_irq_restore(flags);  		wait_event(sync_rcu_preempt_exp_wq, @@ -846,8 +802,6 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);   */  int rcu_preempt_needs_cpu(void)  { -	if (!rcu_preempt_running_reader()) -		rcu_preempt_cpu_qs();  	return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;  } diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index e66b34ab755..25b15033c61 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -49,8 +49,7 @@  #include <asm/byteorder.h>  MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " -	      "Josh Triplett <josh@freedesktop.org>"); +MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>");  static int nreaders = -1;	/* # reader threads, defaults to 2*ncpus */  static int nfakewriters = 4;	/* # fake writer threads */ @@ -206,6 +205,7 @@ static unsigned long boost_starttime;	/* jiffies of next boost test start. */  DEFINE_MUTEX(boost_mutex);		/* protect setting boost_starttime */  					/*  and boost task create/destroy. */  static atomic_t barrier_cbs_count;	/* Barrier callbacks registered. */ +static bool barrier_phase;		/* Test phase. */  static atomic_t barrier_cbs_invoked;	/* Barrier callbacks invoked. */  static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */  static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); @@ -407,8 +407,9 @@ rcu_torture_cb(struct rcu_head *p)  	if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {  		rp->rtort_mbtest = 0;  		rcu_torture_free(rp); -	} else +	} else {  		cur_ops->deferred_free(rp); +	}  }  static int rcu_no_completed(void) @@ -635,6 +636,17 @@ static void srcu_torture_synchronize(void)  	synchronize_srcu(&srcu_ctl);  } +static void srcu_torture_call(struct rcu_head *head, +			      void (*func)(struct rcu_head *head)) +{ +	call_srcu(&srcu_ctl, head, func); +} + +static void srcu_torture_barrier(void) +{ +	srcu_barrier(&srcu_ctl); +} +  static int srcu_torture_stats(char *page)  {  	int cnt = 0; @@ -661,8 +673,8 @@ static struct rcu_torture_ops srcu_ops = {  	.completed	= srcu_torture_completed,  	.deferred_free	= srcu_torture_deferred_free,  	.sync		= srcu_torture_synchronize, -	.call		= NULL, -	.cb_barrier	= NULL, +	.call		= srcu_torture_call, +	.cb_barrier	= srcu_torture_barrier,  	.stats		= srcu_torture_stats,  	.name		= "srcu"  }; @@ -1013,7 +1025,11 @@ rcu_torture_fakewriter(void *arg)  	do {  		schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);  		udelay(rcu_random(&rand) & 0x3ff); -		cur_ops->sync(); +		if (cur_ops->cb_barrier != NULL && +		    rcu_random(&rand) % (nfakewriters * 8) == 0) +			cur_ops->cb_barrier(); +		else +			cur_ops->sync();  		rcu_stutter_wait("rcu_torture_fakewriter");  	} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); @@ -1183,27 +1199,27 @@ rcu_torture_printk(char *page)  	}  	cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);  	cnt += sprintf(&page[cnt], -		       "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " -		       "rtmbe: %d rtbke: %ld rtbre: %ld " -		       "rtbf: %ld rtb: %ld nt: %ld " -		       "onoff: %ld/%ld:%ld/%ld " -		       "barrier: %ld/%ld:%ld", +		       "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",  		       rcu_torture_current,  		       rcu_torture_current_version,  		       list_empty(&rcu_torture_freelist),  		       atomic_read(&n_rcu_torture_alloc),  		       atomic_read(&n_rcu_torture_alloc_fail), -		       atomic_read(&n_rcu_torture_free), +		       atomic_read(&n_rcu_torture_free)); +	cnt += sprintf(&page[cnt], "rtmbe: %d rtbke: %ld rtbre: %ld ",  		       atomic_read(&n_rcu_torture_mberror),  		       n_rcu_torture_boost_ktrerror, -		       n_rcu_torture_boost_rterror, +		       n_rcu_torture_boost_rterror); +	cnt += sprintf(&page[cnt], "rtbf: %ld rtb: %ld nt: %ld ",  		       n_rcu_torture_boost_failure,  		       n_rcu_torture_boosts, -		       n_rcu_torture_timers, +		       n_rcu_torture_timers); +	cnt += sprintf(&page[cnt], "onoff: %ld/%ld:%ld/%ld ",  		       n_online_successes,  		       n_online_attempts,  		       n_offline_successes, -		       n_offline_attempts, +		       n_offline_attempts); +	cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld",  		       n_barrier_successes,  		       n_barrier_attempts,  		       n_rcu_torture_barrier_error); @@ -1445,8 +1461,7 @@ rcu_torture_shutdown(void *arg)  		delta = shutdown_time - jiffies_snap;  		if (verbose)  			printk(KERN_ALERT "%s" TORTURE_FLAG -			       "rcu_torture_shutdown task: %lu " -			       "jiffies remaining\n", +			       "rcu_torture_shutdown task: %lu jiffies remaining\n",  			       torture_type, delta);  		schedule_timeout_interruptible(delta);  		jiffies_snap = ACCESS_ONCE(jiffies); @@ -1498,8 +1513,7 @@ rcu_torture_onoff(void *arg)  			if (cpu_down(cpu) == 0) {  				if (verbose)  					printk(KERN_ALERT "%s" TORTURE_FLAG -					       "rcu_torture_onoff task: " -					       "offlined %d\n", +					       "rcu_torture_onoff task: offlined %d\n",  					       torture_type, cpu);  				n_offline_successes++;  			} @@ -1512,8 +1526,7 @@ rcu_torture_onoff(void *arg)  			if (cpu_up(cpu) == 0) {  				if (verbose)  					printk(KERN_ALERT "%s" TORTURE_FLAG -					       "rcu_torture_onoff task: " -					       "onlined %d\n", +					       "rcu_torture_onoff task: onlined %d\n",  					       torture_type, cpu);  				n_online_successes++;  			} @@ -1631,6 +1644,7 @@ void rcu_torture_barrier_cbf(struct rcu_head *rcu)  static int rcu_torture_barrier_cbs(void *arg)  {  	long myid = (long)arg; +	bool lastphase = 0;  	struct rcu_head rcu;  	init_rcu_head_on_stack(&rcu); @@ -1638,9 +1652,11 @@ static int rcu_torture_barrier_cbs(void *arg)  	set_user_nice(current, 19);  	do {  		wait_event(barrier_cbs_wq[myid], -			   atomic_read(&barrier_cbs_count) == n_barrier_cbs || +			   barrier_phase != lastphase ||  			   kthread_should_stop() ||  			   fullstop != FULLSTOP_DONTSTOP); +		lastphase = barrier_phase; +		smp_mb(); /* ensure barrier_phase load before ->call(). */  		if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)  			break;  		cur_ops->call(&rcu, rcu_torture_barrier_cbf); @@ -1665,7 +1681,8 @@ static int rcu_torture_barrier(void *arg)  	do {  		atomic_set(&barrier_cbs_invoked, 0);  		atomic_set(&barrier_cbs_count, n_barrier_cbs); -		/* wake_up() path contains the required barriers. */ +		smp_mb(); /* Ensure barrier_phase after prior assignments. */ +		barrier_phase = !barrier_phase;  		for (i = 0; i < n_barrier_cbs; i++)  			wake_up(&barrier_cbs_wq[i]);  		wait_event(barrier_wq, @@ -1684,7 +1701,7 @@ static int rcu_torture_barrier(void *arg)  		schedule_timeout_interruptible(HZ / 10);  	} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);  	VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping"); -	rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); +	rcutorture_shutdown_absorb("rcu_torture_barrier");  	while (!kthread_should_stop())  		schedule_timeout_interruptible(1);  	return 0; @@ -1908,8 +1925,8 @@ rcu_torture_init(void)  	static struct rcu_torture_ops *torture_ops[] =  		{ &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,  		  &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, -		  &srcu_ops, &srcu_sync_ops, &srcu_raw_ops, -		  &srcu_raw_sync_ops, &srcu_expedited_ops, +		  &srcu_ops, &srcu_sync_ops, &srcu_expedited_ops, +		  &srcu_raw_ops, &srcu_raw_sync_ops,  		  &sched_ops, &sched_sync_ops, &sched_expedited_ops, };  	mutex_lock(&fullstop_mutex); @@ -1931,8 +1948,7 @@ rcu_torture_init(void)  		return -EINVAL;  	}  	if (cur_ops->fqs == NULL && fqs_duration != 0) { -		printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero " -				  "fqs_duration, fqs disabled.\n"); +		printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n");  		fqs_duration = 0;  	}  	if (cur_ops->init) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4b97bba7396..f280e542e3e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -60,36 +60,44 @@  /* Data structures. */ -static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; +static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; -#define RCU_STATE_INITIALIZER(structname) { \ -	.level = { &structname##_state.node[0] }, \ -	.levelcnt = { \ -		NUM_RCU_LVL_0,  /* root of hierarchy. */ \ -		NUM_RCU_LVL_1, \ -		NUM_RCU_LVL_2, \ -		NUM_RCU_LVL_3, \ -		NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ -	}, \ +#define RCU_STATE_INITIALIZER(sname, cr) { \ +	.level = { &sname##_state.node[0] }, \ +	.call = cr, \  	.fqs_state = RCU_GP_IDLE, \  	.gpnum = -300, \  	.completed = -300, \ -	.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ -	.orphan_nxttail = &structname##_state.orphan_nxtlist, \ -	.orphan_donetail = &structname##_state.orphan_donelist, \ -	.fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ -	.n_force_qs = 0, \ -	.n_force_qs_ngp = 0, \ -	.name = #structname, \ +	.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \ +	.orphan_nxttail = &sname##_state.orphan_nxtlist, \ +	.orphan_donetail = &sname##_state.orphan_donelist, \ +	.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ +	.fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \ +	.name = #sname, \  } -struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); +struct rcu_state rcu_sched_state = +	RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched);  DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); -struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); +struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh);  DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);  static struct rcu_state *rcu_state; +LIST_HEAD(rcu_struct_flavors); + +/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ +static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; +module_param(rcu_fanout_leaf, int, 0); +int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; +static int num_rcu_lvl[] = {  /* Number of rcu_nodes at specified level. */ +	NUM_RCU_LVL_0, +	NUM_RCU_LVL_1, +	NUM_RCU_LVL_2, +	NUM_RCU_LVL_3, +	NUM_RCU_LVL_4, +}; +int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */  /*   * The rcu_scheduler_active variable transitions from zero to one just @@ -147,13 +155,6 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);  unsigned long rcutorture_testseq;  unsigned long rcutorture_vernum; -/* State information for rcu_barrier() and friends. */ - -static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; -static atomic_t rcu_barrier_cpu_count; -static DEFINE_MUTEX(rcu_barrier_mutex); -static struct completion rcu_barrier_completion; -  /*   * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s   * permit this function to be invoked without holding the root rcu_node @@ -358,7 +359,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)  		struct task_struct *idle = idle_task(smp_processor_id());  		trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); -		ftrace_dump(DUMP_ALL); +		ftrace_dump(DUMP_ORIG);  		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",  			  current->pid, current->comm,  			  idle->pid, idle->comm); /* must be idle task! */ @@ -468,7 +469,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)  		trace_rcu_dyntick("Error on exit: not idle task",  				  oldval, rdtp->dynticks_nesting); -		ftrace_dump(DUMP_ALL); +		ftrace_dump(DUMP_ORIG);  		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",  			  current->pid, current->comm,  			  idle->pid, idle->comm); /* must be idle task! */ @@ -585,8 +586,6 @@ void rcu_nmi_exit(void)  	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);  } -#ifdef CONFIG_PROVE_RCU -  /**   * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle   * @@ -604,7 +603,7 @@ int rcu_is_cpu_idle(void)  }  EXPORT_SYMBOL(rcu_is_cpu_idle); -#ifdef CONFIG_HOTPLUG_CPU +#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)  /*   * Is the current CPU online?  Disable preemption to avoid false positives @@ -645,9 +644,7 @@ bool rcu_lockdep_current_cpu_online(void)  }  EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ - -#endif /* #ifdef CONFIG_PROVE_RCU */ +#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */  /**   * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle @@ -733,7 +730,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)  	int cpu;  	long delta;  	unsigned long flags; -	int ndetected; +	int ndetected = 0;  	struct rcu_node *rnp = rcu_get_root(rsp);  	/* Only let one CPU complain about others per time interval. */ @@ -774,7 +771,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)  	 */  	rnp = rcu_get_root(rsp);  	raw_spin_lock_irqsave(&rnp->lock, flags); -	ndetected = rcu_print_task_stall(rnp); +	ndetected += rcu_print_task_stall(rnp);  	raw_spin_unlock_irqrestore(&rnp->lock, flags);  	print_cpu_stall_info_end(); @@ -860,9 +857,10 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)   */  void rcu_cpu_stall_reset(void)  { -	rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; -	rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; -	rcu_preempt_stall_reset(); +	struct rcu_state *rsp; + +	for_each_rcu_flavor(rsp) +		rsp->jiffies_stall = jiffies + ULONG_MAX / 2;  }  static struct notifier_block rcu_panic_block = { @@ -894,8 +892,9 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct  		if (rnp->qsmask & rdp->grpmask) {  			rdp->qs_pending = 1;  			rdp->passed_quiesce = 0; -		} else +		} else {  			rdp->qs_pending = 0; +		}  		zero_cpu_stall_ticks(rdp);  	}  } @@ -937,6 +936,18 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)  }  /* + * Initialize the specified rcu_data structure's callback list to empty. + */ +static void init_callback_list(struct rcu_data *rdp) +{ +	int i; + +	rdp->nxtlist = NULL; +	for (i = 0; i < RCU_NEXT_SIZE; i++) +		rdp->nxttail[i] = &rdp->nxtlist; +} + +/*   * Advance this CPU's callbacks, but only if the current grace period   * has ended.  This may be called only from the CPU to whom the rdp   * belongs.  In addition, the corresponding leaf rcu_node structure's @@ -1328,8 +1339,6 @@ static void  rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,  			  struct rcu_node *rnp, struct rcu_data *rdp)  { -	int i; -  	/*  	 * Orphan the callbacks.  First adjust the counts.  This is safe  	 * because ->onofflock excludes _rcu_barrier()'s adoption of @@ -1340,7 +1349,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,  		rsp->qlen += rdp->qlen;  		rdp->n_cbs_orphaned += rdp->qlen;  		rdp->qlen_lazy = 0; -		rdp->qlen = 0; +		ACCESS_ONCE(rdp->qlen) = 0;  	}  	/* @@ -1369,9 +1378,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,  	}  	/* Finally, initialize the rcu_data structure's list to empty.  */ -	rdp->nxtlist = NULL; -	for (i = 0; i < RCU_NEXT_SIZE; i++) -		rdp->nxttail[i] = &rdp->nxtlist; +	init_callback_list(rdp);  }  /* @@ -1505,6 +1512,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)  		raw_spin_unlock_irqrestore(&rnp->lock, flags);  	if (need_report & RCU_OFL_TASKS_EXP_GP)  		rcu_report_exp_rnp(rsp, rnp, true); +	WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, +		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", +		  cpu, rdp->qlen, rdp->nxtlist);  }  #else /* #ifdef CONFIG_HOTPLUG_CPU */ @@ -1592,7 +1602,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)  	}  	smp_mb(); /* List handling before counting for rcu_barrier(). */  	rdp->qlen_lazy -= count_lazy; -	rdp->qlen -= count; +	ACCESS_ONCE(rdp->qlen) -= count;  	rdp->n_cbs_invoked += count;  	/* Reinstate batch limit if we have worked down the excess. */ @@ -1605,6 +1615,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)  		rdp->n_force_qs_snap = rsp->n_force_qs;  	} else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)  		rdp->qlen_last_fqs_check = rdp->qlen; +	WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0));  	local_irq_restore(flags); @@ -1745,8 +1756,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)  		break; /* grace period idle or initializing, ignore. */  	case RCU_SAVE_DYNTICK: -		if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) -			break; /* So gcc recognizes the dead code. */  		raw_spin_unlock(&rnp->lock);  /* irqs remain disabled */ @@ -1788,9 +1797,10 @@ unlock_fqs_ret:   * whom the rdp belongs.   */  static void -__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) +__rcu_process_callbacks(struct rcu_state *rsp)  {  	unsigned long flags; +	struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);  	WARN_ON_ONCE(rdp->beenonline == 0); @@ -1826,11 +1836,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)   */  static void rcu_process_callbacks(struct softirq_action *unused)  { +	struct rcu_state *rsp; +  	trace_rcu_utilization("Start RCU core"); -	__rcu_process_callbacks(&rcu_sched_state, -				&__get_cpu_var(rcu_sched_data)); -	__rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); -	rcu_preempt_process_callbacks(); +	for_each_rcu_flavor(rsp) +		__rcu_process_callbacks(rsp);  	trace_rcu_utilization("End RCU core");  } @@ -1857,6 +1867,56 @@ static void invoke_rcu_core(void)  	raise_softirq(RCU_SOFTIRQ);  } +/* + * Handle any core-RCU processing required by a call_rcu() invocation. + */ +static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, +			    struct rcu_head *head, unsigned long flags) +{ +	/* +	 * If called from an extended quiescent state, invoke the RCU +	 * core in order to force a re-evaluation of RCU's idleness. +	 */ +	if (rcu_is_cpu_idle() && cpu_online(smp_processor_id())) +		invoke_rcu_core(); + +	/* If interrupts were disabled or CPU offline, don't invoke RCU core. */ +	if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id())) +		return; + +	/* +	 * Force the grace period if too many callbacks or too long waiting. +	 * Enforce hysteresis, and don't invoke force_quiescent_state() +	 * if some other CPU has recently done so.  Also, don't bother +	 * invoking force_quiescent_state() if the newly enqueued callback +	 * is the only one waiting for a grace period to complete. +	 */ +	if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { + +		/* Are we ignoring a completed grace period? */ +		rcu_process_gp_end(rsp, rdp); +		check_for_new_grace_period(rsp, rdp); + +		/* Start a new grace period if one not already started. */ +		if (!rcu_gp_in_progress(rsp)) { +			unsigned long nestflag; +			struct rcu_node *rnp_root = rcu_get_root(rsp); + +			raw_spin_lock_irqsave(&rnp_root->lock, nestflag); +			rcu_start_gp(rsp, nestflag);  /* rlses rnp_root->lock */ +		} else { +			/* Give the grace period a kick. */ +			rdp->blimit = LONG_MAX; +			if (rsp->n_force_qs == rdp->n_force_qs_snap && +			    *rdp->nxttail[RCU_DONE_TAIL] != head) +				force_quiescent_state(rsp, 0); +			rdp->n_force_qs_snap = rsp->n_force_qs; +			rdp->qlen_last_fqs_check = rdp->qlen; +		} +	} else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) +		force_quiescent_state(rsp, 1); +} +  static void  __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),  	   struct rcu_state *rsp, bool lazy) @@ -1881,7 +1941,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),  	rdp = this_cpu_ptr(rsp->rda);  	/* Add the callback to our list. */ -	rdp->qlen++; +	ACCESS_ONCE(rdp->qlen)++;  	if (lazy)  		rdp->qlen_lazy++;  	else @@ -1896,43 +1956,8 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),  	else  		trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); -	/* If interrupts were disabled, don't dive into RCU core. */ -	if (irqs_disabled_flags(flags)) { -		local_irq_restore(flags); -		return; -	} - -	/* -	 * Force the grace period if too many callbacks or too long waiting. -	 * Enforce hysteresis, and don't invoke force_quiescent_state() -	 * if some other CPU has recently done so.  Also, don't bother -	 * invoking force_quiescent_state() if the newly enqueued callback -	 * is the only one waiting for a grace period to complete. -	 */ -	if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { - -		/* Are we ignoring a completed grace period? */ -		rcu_process_gp_end(rsp, rdp); -		check_for_new_grace_period(rsp, rdp); - -		/* Start a new grace period if one not already started. */ -		if (!rcu_gp_in_progress(rsp)) { -			unsigned long nestflag; -			struct rcu_node *rnp_root = rcu_get_root(rsp); - -			raw_spin_lock_irqsave(&rnp_root->lock, nestflag); -			rcu_start_gp(rsp, nestflag);  /* rlses rnp_root->lock */ -		} else { -			/* Give the grace period a kick. */ -			rdp->blimit = LONG_MAX; -			if (rsp->n_force_qs == rdp->n_force_qs_snap && -			    *rdp->nxttail[RCU_DONE_TAIL] != head) -				force_quiescent_state(rsp, 0); -			rdp->n_force_qs_snap = rsp->n_force_qs; -			rdp->qlen_last_fqs_check = rdp->qlen; -		} -	} else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) -		force_quiescent_state(rsp, 1); +	/* Go handle any RCU core processing required. */ +	__call_rcu_core(rsp, rdp, head, flags);  	local_irq_restore(flags);  } @@ -1962,28 +1987,16 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);   * occasionally incorrectly indicate that there are multiple CPUs online   * when there was in fact only one the whole time, as this just adds   * some overhead: RCU still operates correctly. - * - * Of course, sampling num_online_cpus() with preemption enabled can - * give erroneous results if there are concurrent CPU-hotplug operations. - * For example, given a demonic sequence of preemptions in num_online_cpus() - * and CPU-hotplug operations, there could be two or more CPUs online at - * all times, but num_online_cpus() might well return one (or even zero). - * - * However, all such demonic sequences require at least one CPU-offline - * operation.  Furthermore, rcu_blocking_is_gp() giving the wrong answer - * is only a problem if there is an RCU read-side critical section executing - * throughout.  But RCU-sched and RCU-bh read-side critical sections - * disable either preemption or bh, which prevents a CPU from going offline. - * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return - * that there is only one CPU when in fact there was more than one throughout - * is when there were no RCU readers in the system.  If there are no - * RCU readers, the grace period by definition can be of zero length, - * regardless of the number of online CPUs.   */  static inline int rcu_blocking_is_gp(void)  { +	int ret; +  	might_sleep();  /* Check for RCU read-side critical section. */ -	return num_online_cpus() <= 1; +	preempt_disable(); +	ret = num_online_cpus() <= 1; +	preempt_enable(); +	return ret;  }  /** @@ -2118,9 +2131,9 @@ void synchronize_sched_expedited(void)  		put_online_cpus();  		/* No joy, try again later.  Or just synchronize_sched(). */ -		if (trycount++ < 10) +		if (trycount++ < 10) {  			udelay(trycount * num_online_cpus()); -		else { +		} else {  			synchronize_sched();  			return;  		} @@ -2241,9 +2254,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)   */  static int rcu_pending(int cpu)  { -	return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || -	       __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) || -	       rcu_preempt_pending(cpu); +	struct rcu_state *rsp; + +	for_each_rcu_flavor(rsp) +		if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu))) +			return 1; +	return 0;  }  /* @@ -2253,20 +2269,41 @@ static int rcu_pending(int cpu)   */  static int rcu_cpu_has_callbacks(int cpu)  { +	struct rcu_state *rsp; +  	/* RCU callbacks either ready or pending? */ -	return per_cpu(rcu_sched_data, cpu).nxtlist || -	       per_cpu(rcu_bh_data, cpu).nxtlist || -	       rcu_preempt_cpu_has_callbacks(cpu); +	for_each_rcu_flavor(rsp) +		if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) +			return 1; +	return 0; +} + +/* + * Helper function for _rcu_barrier() tracing.  If tracing is disabled, + * the compiler is expected to optimize this away. + */ +static void _rcu_barrier_trace(struct rcu_state *rsp, char *s, +			       int cpu, unsigned long done) +{ +	trace_rcu_barrier(rsp->name, s, cpu, +			  atomic_read(&rsp->barrier_cpu_count), done);  }  /*   * RCU callback function for _rcu_barrier().  If we are last, wake   * up the task executing _rcu_barrier().   */ -static void rcu_barrier_callback(struct rcu_head *notused) +static void rcu_barrier_callback(struct rcu_head *rhp)  { -	if (atomic_dec_and_test(&rcu_barrier_cpu_count)) -		complete(&rcu_barrier_completion); +	struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head); +	struct rcu_state *rsp = rdp->rsp; + +	if (atomic_dec_and_test(&rsp->barrier_cpu_count)) { +		_rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done); +		complete(&rsp->barrier_completion); +	} else { +		_rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done); +	}  }  /* @@ -2274,35 +2311,63 @@ static void rcu_barrier_callback(struct rcu_head *notused)   */  static void rcu_barrier_func(void *type)  { -	int cpu = smp_processor_id(); -	struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); -	void (*call_rcu_func)(struct rcu_head *head, -			      void (*func)(struct rcu_head *head)); +	struct rcu_state *rsp = type; +	struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); -	atomic_inc(&rcu_barrier_cpu_count); -	call_rcu_func = type; -	call_rcu_func(head, rcu_barrier_callback); +	_rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done); +	atomic_inc(&rsp->barrier_cpu_count); +	rsp->call(&rdp->barrier_head, rcu_barrier_callback);  }  /*   * Orchestrate the specified type of RCU barrier, waiting for all   * RCU callbacks of the specified type to complete.   */ -static void _rcu_barrier(struct rcu_state *rsp, -			 void (*call_rcu_func)(struct rcu_head *head, -					       void (*func)(struct rcu_head *head))) +static void _rcu_barrier(struct rcu_state *rsp)  {  	int cpu;  	unsigned long flags;  	struct rcu_data *rdp; -	struct rcu_head rh; +	struct rcu_data rd; +	unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); +	unsigned long snap_done; -	init_rcu_head_on_stack(&rh); +	init_rcu_head_on_stack(&rd.barrier_head); +	_rcu_barrier_trace(rsp, "Begin", -1, snap);  	/* Take mutex to serialize concurrent rcu_barrier() requests. */ -	mutex_lock(&rcu_barrier_mutex); +	mutex_lock(&rsp->barrier_mutex); + +	/* +	 * Ensure that all prior references, including to ->n_barrier_done, +	 * are ordered before the _rcu_barrier() machinery. +	 */ +	smp_mb();  /* See above block comment. */ + +	/* +	 * Recheck ->n_barrier_done to see if others did our work for us. +	 * This means checking ->n_barrier_done for an even-to-odd-to-even +	 * transition.  The "if" expression below therefore rounds the old +	 * value up to the next even number and adds two before comparing. +	 */ +	snap_done = ACCESS_ONCE(rsp->n_barrier_done); +	_rcu_barrier_trace(rsp, "Check", -1, snap_done); +	if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) { +		_rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done); +		smp_mb(); /* caller's subsequent code after above check. */ +		mutex_unlock(&rsp->barrier_mutex); +		return; +	} -	smp_mb();  /* Prevent any prior operations from leaking in. */ +	/* +	 * Increment ->n_barrier_done to avoid duplicate work.  Use +	 * ACCESS_ONCE() to prevent the compiler from speculating +	 * the increment to precede the early-exit check. +	 */ +	ACCESS_ONCE(rsp->n_barrier_done)++; +	WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1); +	_rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done); +	smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */  	/*  	 * Initialize the count to one rather than to zero in order to @@ -2321,8 +2386,8 @@ static void _rcu_barrier(struct rcu_state *rsp,  	 * 6.	Both rcu_barrier_callback() callbacks are invoked, awakening  	 *	us -- but before CPU 1's orphaned callbacks are invoked!!!  	 */ -	init_completion(&rcu_barrier_completion); -	atomic_set(&rcu_barrier_cpu_count, 1); +	init_completion(&rsp->barrier_completion); +	atomic_set(&rsp->barrier_cpu_count, 1);  	raw_spin_lock_irqsave(&rsp->onofflock, flags);  	rsp->rcu_barrier_in_progress = current;  	raw_spin_unlock_irqrestore(&rsp->onofflock, flags); @@ -2338,14 +2403,19 @@ static void _rcu_barrier(struct rcu_state *rsp,  		preempt_disable();  		rdp = per_cpu_ptr(rsp->rda, cpu);  		if (cpu_is_offline(cpu)) { +			_rcu_barrier_trace(rsp, "Offline", cpu, +					   rsp->n_barrier_done);  			preempt_enable();  			while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen))  				schedule_timeout_interruptible(1);  		} else if (ACCESS_ONCE(rdp->qlen)) { -			smp_call_function_single(cpu, rcu_barrier_func, -						 (void *)call_rcu_func, 1); +			_rcu_barrier_trace(rsp, "OnlineQ", cpu, +					   rsp->n_barrier_done); +			smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);  			preempt_enable();  		} else { +			_rcu_barrier_trace(rsp, "OnlineNQ", cpu, +					   rsp->n_barrier_done);  			preempt_enable();  		}  	} @@ -2362,24 +2432,32 @@ static void _rcu_barrier(struct rcu_state *rsp,  	rcu_adopt_orphan_cbs(rsp);  	rsp->rcu_barrier_in_progress = NULL;  	raw_spin_unlock_irqrestore(&rsp->onofflock, flags); -	atomic_inc(&rcu_barrier_cpu_count); +	atomic_inc(&rsp->barrier_cpu_count);  	smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ -	call_rcu_func(&rh, rcu_barrier_callback); +	rd.rsp = rsp; +	rsp->call(&rd.barrier_head, rcu_barrier_callback);  	/*  	 * Now that we have an rcu_barrier_callback() callback on each  	 * CPU, and thus each counted, remove the initial count.  	 */ -	if (atomic_dec_and_test(&rcu_barrier_cpu_count)) -		complete(&rcu_barrier_completion); +	if (atomic_dec_and_test(&rsp->barrier_cpu_count)) +		complete(&rsp->barrier_completion); + +	/* Increment ->n_barrier_done to prevent duplicate work. */ +	smp_mb(); /* Keep increment after above mechanism. */ +	ACCESS_ONCE(rsp->n_barrier_done)++; +	WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0); +	_rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done); +	smp_mb(); /* Keep increment before caller's subsequent code. */  	/* Wait for all rcu_barrier_callback() callbacks to be invoked. */ -	wait_for_completion(&rcu_barrier_completion); +	wait_for_completion(&rsp->barrier_completion);  	/* Other rcu_barrier() invocations can now safely proceed. */ -	mutex_unlock(&rcu_barrier_mutex); +	mutex_unlock(&rsp->barrier_mutex); -	destroy_rcu_head_on_stack(&rh); +	destroy_rcu_head_on_stack(&rd.barrier_head);  }  /** @@ -2387,7 +2465,7 @@ static void _rcu_barrier(struct rcu_state *rsp,   */  void rcu_barrier_bh(void)  { -	_rcu_barrier(&rcu_bh_state, call_rcu_bh); +	_rcu_barrier(&rcu_bh_state);  }  EXPORT_SYMBOL_GPL(rcu_barrier_bh); @@ -2396,7 +2474,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh);   */  void rcu_barrier_sched(void)  { -	_rcu_barrier(&rcu_sched_state, call_rcu_sched); +	_rcu_barrier(&rcu_sched_state);  }  EXPORT_SYMBOL_GPL(rcu_barrier_sched); @@ -2407,18 +2485,15 @@ static void __init  rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)  {  	unsigned long flags; -	int i;  	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);  	struct rcu_node *rnp = rcu_get_root(rsp);  	/* Set up local state, ensuring consistent view of global state. */  	raw_spin_lock_irqsave(&rnp->lock, flags);  	rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); -	rdp->nxtlist = NULL; -	for (i = 0; i < RCU_NEXT_SIZE; i++) -		rdp->nxttail[i] = &rdp->nxtlist; +	init_callback_list(rdp);  	rdp->qlen_lazy = 0; -	rdp->qlen = 0; +	ACCESS_ONCE(rdp->qlen) = 0;  	rdp->dynticks = &per_cpu(rcu_dynticks, cpu);  	WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);  	WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); @@ -2492,9 +2567,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)  static void __cpuinit rcu_prepare_cpu(int cpu)  { -	rcu_init_percpu_data(cpu, &rcu_sched_state, 0); -	rcu_init_percpu_data(cpu, &rcu_bh_state, 0); -	rcu_preempt_init_percpu_data(cpu); +	struct rcu_state *rsp; + +	for_each_rcu_flavor(rsp) +		rcu_init_percpu_data(cpu, rsp, +				     strcmp(rsp->name, "rcu_preempt") == 0);  }  /* @@ -2506,6 +2583,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,  	long cpu = (long)hcpu;  	struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);  	struct rcu_node *rnp = rdp->mynode; +	struct rcu_state *rsp;  	trace_rcu_utilization("Start CPU hotplug");  	switch (action) { @@ -2530,18 +2608,16 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,  		 * touch any data without introducing corruption. We send the  		 * dying CPU's callbacks to an arbitrarily chosen online CPU.  		 */ -		rcu_cleanup_dying_cpu(&rcu_bh_state); -		rcu_cleanup_dying_cpu(&rcu_sched_state); -		rcu_preempt_cleanup_dying_cpu(); +		for_each_rcu_flavor(rsp) +			rcu_cleanup_dying_cpu(rsp);  		rcu_cleanup_after_idle(cpu);  		break;  	case CPU_DEAD:  	case CPU_DEAD_FROZEN:  	case CPU_UP_CANCELED:  	case CPU_UP_CANCELED_FROZEN: -		rcu_cleanup_dead_cpu(cpu, &rcu_bh_state); -		rcu_cleanup_dead_cpu(cpu, &rcu_sched_state); -		rcu_preempt_cleanup_dead_cpu(cpu); +		for_each_rcu_flavor(rsp) +			rcu_cleanup_dead_cpu(cpu, rsp);  		break;  	default:  		break; @@ -2574,9 +2650,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)  {  	int i; -	for (i = NUM_RCU_LVLS - 1; i > 0; i--) +	for (i = rcu_num_lvls - 1; i > 0; i--)  		rsp->levelspread[i] = CONFIG_RCU_FANOUT; -	rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF; +	rsp->levelspread[0] = rcu_fanout_leaf;  }  #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */  static void __init rcu_init_levelspread(struct rcu_state *rsp) @@ -2586,7 +2662,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)  	int i;  	cprv = NR_CPUS; -	for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { +	for (i = rcu_num_lvls - 1; i >= 0; i--) {  		ccur = rsp->levelcnt[i];  		rsp->levelspread[i] = (cprv + ccur - 1) / ccur;  		cprv = ccur; @@ -2613,13 +2689,15 @@ static void __init rcu_init_one(struct rcu_state *rsp,  	/* Initialize the level-tracking arrays. */ -	for (i = 1; i < NUM_RCU_LVLS; i++) +	for (i = 0; i < rcu_num_lvls; i++) +		rsp->levelcnt[i] = num_rcu_lvl[i]; +	for (i = 1; i < rcu_num_lvls; i++)  		rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];  	rcu_init_levelspread(rsp);  	/* Initialize the elements themselves, starting from the leaves. */ -	for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { +	for (i = rcu_num_lvls - 1; i >= 0; i--) {  		cpustride *= rsp->levelspread[i];  		rnp = rsp->level[i];  		for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { @@ -2649,13 +2727,74 @@ static void __init rcu_init_one(struct rcu_state *rsp,  	}  	rsp->rda = rda; -	rnp = rsp->level[NUM_RCU_LVLS - 1]; +	rnp = rsp->level[rcu_num_lvls - 1];  	for_each_possible_cpu(i) {  		while (i > rnp->grphi)  			rnp++;  		per_cpu_ptr(rsp->rda, i)->mynode = rnp;  		rcu_boot_init_percpu_data(i, rsp);  	} +	list_add(&rsp->flavors, &rcu_struct_flavors); +} + +/* + * Compute the rcu_node tree geometry from kernel parameters.  This cannot + * replace the definitions in rcutree.h because those are needed to size + * the ->node array in the rcu_state structure. + */ +static void __init rcu_init_geometry(void) +{ +	int i; +	int j; +	int n = nr_cpu_ids; +	int rcu_capacity[MAX_RCU_LVLS + 1]; + +	/* If the compile-time values are accurate, just leave. */ +	if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF) +		return; + +	/* +	 * Compute number of nodes that can be handled an rcu_node tree +	 * with the given number of levels.  Setting rcu_capacity[0] makes +	 * some of the arithmetic easier. +	 */ +	rcu_capacity[0] = 1; +	rcu_capacity[1] = rcu_fanout_leaf; +	for (i = 2; i <= MAX_RCU_LVLS; i++) +		rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT; + +	/* +	 * The boot-time rcu_fanout_leaf parameter is only permitted +	 * to increase the leaf-level fanout, not decrease it.  Of course, +	 * the leaf-level fanout cannot exceed the number of bits in +	 * the rcu_node masks.  Finally, the tree must be able to accommodate +	 * the configured number of CPUs.  Complain and fall back to the +	 * compile-time values if these limits are exceeded. +	 */ +	if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF || +	    rcu_fanout_leaf > sizeof(unsigned long) * 8 || +	    n > rcu_capacity[MAX_RCU_LVLS]) { +		WARN_ON(1); +		return; +	} + +	/* Calculate the number of rcu_nodes at each level of the tree. */ +	for (i = 1; i <= MAX_RCU_LVLS; i++) +		if (n <= rcu_capacity[i]) { +			for (j = 0; j <= i; j++) +				num_rcu_lvl[j] = +					DIV_ROUND_UP(n, rcu_capacity[i - j]); +			rcu_num_lvls = i; +			for (j = i + 1; j <= MAX_RCU_LVLS; j++) +				num_rcu_lvl[j] = 0; +			break; +		} + +	/* Calculate the total number of rcu_node structures. */ +	rcu_num_nodes = 0; +	for (i = 0; i <= MAX_RCU_LVLS; i++) +		rcu_num_nodes += num_rcu_lvl[i]; +	rcu_num_nodes -= n;  }  void __init rcu_init(void) @@ -2663,6 +2802,7 @@ void __init rcu_init(void)  	int cpu;  	rcu_bootup_announce(); +	rcu_init_geometry();  	rcu_init_one(&rcu_sched_state, &rcu_sched_data);  	rcu_init_one(&rcu_bh_state, &rcu_bh_data);  	__rcu_init_preempt(); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 19b61ac1079..4d29169f212 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -42,28 +42,28 @@  #define RCU_FANOUT_4	      (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)  #if NR_CPUS <= RCU_FANOUT_1 -#  define NUM_RCU_LVLS	      1 +#  define RCU_NUM_LVLS	      1  #  define NUM_RCU_LVL_0	      1  #  define NUM_RCU_LVL_1	      (NR_CPUS)  #  define NUM_RCU_LVL_2	      0  #  define NUM_RCU_LVL_3	      0  #  define NUM_RCU_LVL_4	      0  #elif NR_CPUS <= RCU_FANOUT_2 -#  define NUM_RCU_LVLS	      2 +#  define RCU_NUM_LVLS	      2  #  define NUM_RCU_LVL_0	      1  #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)  #  define NUM_RCU_LVL_2	      (NR_CPUS)  #  define NUM_RCU_LVL_3	      0  #  define NUM_RCU_LVL_4	      0  #elif NR_CPUS <= RCU_FANOUT_3 -#  define NUM_RCU_LVLS	      3 +#  define RCU_NUM_LVLS	      3  #  define NUM_RCU_LVL_0	      1  #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)  #  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)  #  define NUM_RCU_LVL_3	      (NR_CPUS)  #  define NUM_RCU_LVL_4	      0  #elif NR_CPUS <= RCU_FANOUT_4 -#  define NUM_RCU_LVLS	      4 +#  define RCU_NUM_LVLS	      4  #  define NUM_RCU_LVL_0	      1  #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)  #  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) @@ -76,6 +76,9 @@  #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)  #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) +extern int rcu_num_lvls; +extern int rcu_num_nodes; +  /*   * Dynticks per-CPU state.   */ @@ -97,6 +100,7 @@ struct rcu_dynticks {  				    /* # times non-lazy CBs posted to CPU. */  	unsigned long nonlazy_posted_snap;  				    /* idle-period nonlazy_posted snapshot. */ +	int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */  #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */  }; @@ -206,7 +210,7 @@ struct rcu_node {   */  #define rcu_for_each_node_breadth_first(rsp, rnp) \  	for ((rnp) = &(rsp)->node[0]; \ -	     (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) +	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)  /*   * Do a breadth-first scan of the non-leaf rcu_node structures for the @@ -215,7 +219,7 @@ struct rcu_node {   */  #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \  	for ((rnp) = &(rsp)->node[0]; \ -	     (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) +	     (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)  /*   * Scan the leaves of the rcu_node hierarchy for the specified rcu_state @@ -224,8 +228,8 @@ struct rcu_node {   * It is still a leaf node, even if it is also the root node.   */  #define rcu_for_each_leaf_node(rsp, rnp) \ -	for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ -	     (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) +	for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \ +	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)  /* Index values for nxttail array in struct rcu_data. */  #define RCU_DONE_TAIL		0	/* Also RCU_WAIT head. */ @@ -311,6 +315,9 @@ struct rcu_data {  	unsigned long n_rp_need_fqs;  	unsigned long n_rp_need_nothing; +	/* 6) _rcu_barrier() callback. */ +	struct rcu_head barrier_head; +  	int cpu;  	struct rcu_state *rsp;  }; @@ -357,10 +364,12 @@ do {									\   */  struct rcu_state {  	struct rcu_node node[NUM_RCU_NODES];	/* Hierarchy. */ -	struct rcu_node *level[NUM_RCU_LVLS];	/* Hierarchy levels. */ +	struct rcu_node *level[RCU_NUM_LVLS];	/* Hierarchy levels. */  	u32 levelcnt[MAX_RCU_LVLS + 1];		/* # nodes in each level. */ -	u8 levelspread[NUM_RCU_LVLS];		/* kids/node in each level. */ +	u8 levelspread[RCU_NUM_LVLS];		/* kids/node in each level. */  	struct rcu_data __percpu *rda;		/* pointer of percu rcu_data. */ +	void (*call)(struct rcu_head *head,	/* call_rcu() flavor. */ +		     void (*func)(struct rcu_head *head));  	/* The following fields are guarded by the root rcu_node's lock. */ @@ -392,6 +401,11 @@ struct rcu_state {  	struct task_struct *rcu_barrier_in_progress;  						/* Task doing rcu_barrier(), */  						/*  or NULL if no barrier. */ +	struct mutex barrier_mutex;		/* Guards barrier fields. */ +	atomic_t barrier_cpu_count;		/* # CPUs waiting on. */ +	struct completion barrier_completion;	/* Wake at barrier end. */ +	unsigned long n_barrier_done;		/* ++ at start and end of */ +						/*  _rcu_barrier(). */  	raw_spinlock_t fqslock;			/* Only one task forcing */  						/*  quiescent states. */  	unsigned long jiffies_force_qs;		/* Time at which to invoke */ @@ -409,8 +423,13 @@ struct rcu_state {  	unsigned long gp_max;			/* Maximum GP duration in */  						/*  jiffies. */  	char *name;				/* Name of structure. */ +	struct list_head flavors;		/* List of RCU flavors. */  }; +extern struct list_head rcu_struct_flavors; +#define for_each_rcu_flavor(rsp) \ +	list_for_each_entry((rsp), &rcu_struct_flavors, flavors) +  /* Return values for rcu_preempt_offline_tasks(). */  #define RCU_OFL_TASKS_NORM_GP	0x1		/* Tasks blocking normal */ @@ -453,25 +472,18 @@ static void rcu_stop_cpu_kthread(int cpu);  #endif /* #ifdef CONFIG_HOTPLUG_CPU */  static void rcu_print_detail_task_stall(struct rcu_state *rsp);  static int rcu_print_task_stall(struct rcu_node *rnp); -static void rcu_preempt_stall_reset(void);  static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);  #ifdef CONFIG_HOTPLUG_CPU  static int rcu_preempt_offline_tasks(struct rcu_state *rsp,  				     struct rcu_node *rnp,  				     struct rcu_data *rdp);  #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -static void rcu_preempt_cleanup_dead_cpu(int cpu);  static void rcu_preempt_check_callbacks(int cpu); -static void rcu_preempt_process_callbacks(void);  void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));  #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)  static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,  			       bool wake);  #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ -static int rcu_preempt_pending(int cpu); -static int rcu_preempt_cpu_has_callbacks(int cpu); -static void __cpuinit rcu_preempt_init_percpu_data(int cpu); -static void rcu_preempt_cleanup_dying_cpu(void);  static void __init __rcu_init_preempt(void);  static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);  static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 3e4899459f3..7f3244c0df0 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -68,17 +68,21 @@ static void __init rcu_bootup_announce_oddness(void)  	printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n");  #endif  #if NUM_RCU_LVL_4 != 0 -	printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); +	printk(KERN_INFO "\tFour-level hierarchy is enabled.\n");  #endif +	if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) +		printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); +	if (nr_cpu_ids != NR_CPUS) +		printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);  }  #ifdef CONFIG_TREE_PREEMPT_RCU -struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt); +struct rcu_state rcu_preempt_state = +	RCU_STATE_INITIALIZER(rcu_preempt, call_rcu);  DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);  static struct rcu_state *rcu_state = &rcu_preempt_state; -static void rcu_read_unlock_special(struct task_struct *t);  static int rcu_preempted_readers_exp(struct rcu_node *rnp);  /* @@ -233,18 +237,6 @@ static void rcu_preempt_note_context_switch(int cpu)  }  /* - * Tree-preemptible RCU implementation for rcu_read_lock(). - * Just increment ->rcu_read_lock_nesting, shared state will be updated - * if we block. - */ -void __rcu_read_lock(void) -{ -	current->rcu_read_lock_nesting++; -	barrier();  /* needed if we ever invoke rcu_read_lock in rcutree.c */ -} -EXPORT_SYMBOL_GPL(__rcu_read_lock); - -/*   * Check for preempted RCU readers blocking the current grace period   * for the specified rcu_node structure.  If the caller needs a reliable   * answer, it must hold the rcu_node's ->lock. @@ -310,7 +302,7 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t,   * notify RCU core processing or task having blocked during the RCU   * read-side critical section.   */ -static noinline void rcu_read_unlock_special(struct task_struct *t) +void rcu_read_unlock_special(struct task_struct *t)  {  	int empty;  	int empty_exp; @@ -398,8 +390,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)  							 rnp->grphi,  							 !!rnp->gp_tasks);  			rcu_report_unblock_qs_rnp(rnp, flags); -		} else +		} else {  			raw_spin_unlock_irqrestore(&rnp->lock, flags); +		}  #ifdef CONFIG_RCU_BOOST  		/* Unboost if we were boosted. */ @@ -418,38 +411,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)  	}  } -/* - * Tree-preemptible RCU implementation for rcu_read_unlock(). - * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost - * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then - * invoke rcu_read_unlock_special() to clean up after a context switch - * in an RCU read-side critical section and other special cases. - */ -void __rcu_read_unlock(void) -{ -	struct task_struct *t = current; - -	if (t->rcu_read_lock_nesting != 1) -		--t->rcu_read_lock_nesting; -	else { -		barrier();  /* critical section before exit code. */ -		t->rcu_read_lock_nesting = INT_MIN; -		barrier();  /* assign before ->rcu_read_unlock_special load */ -		if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) -			rcu_read_unlock_special(t); -		barrier();  /* ->rcu_read_unlock_special load before assign */ -		t->rcu_read_lock_nesting = 0; -	} -#ifdef CONFIG_PROVE_LOCKING -	{ -		int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); - -		WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); -	} -#endif /* #ifdef CONFIG_PROVE_LOCKING */ -} -EXPORT_SYMBOL_GPL(__rcu_read_unlock); -  #ifdef CONFIG_RCU_CPU_STALL_VERBOSE  /* @@ -540,16 +501,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp)  }  /* - * Suppress preemptible RCU's CPU stall warnings by pushing the - * time of the next stall-warning message comfortably far into the - * future. - */ -static void rcu_preempt_stall_reset(void) -{ -	rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; -} - -/*   * Check that the list of blocked tasks for the newly completed grace   * period is in fact empty.  It is a serious bug to complete a grace   * period that still has RCU readers blocked!  This function must be @@ -650,14 +601,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,  #endif /* #ifdef CONFIG_HOTPLUG_CPU */  /* - * Do CPU-offline processing for preemptible RCU. - */ -static void rcu_preempt_cleanup_dead_cpu(int cpu) -{ -	rcu_cleanup_dead_cpu(cpu, &rcu_preempt_state); -} - -/*   * Check for a quiescent state from the current CPU.  When a task blocks,   * the task is recorded in the corresponding CPU's rcu_node structure,   * which is checked elsewhere. @@ -677,15 +620,6 @@ static void rcu_preempt_check_callbacks(int cpu)  		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;  } -/* - * Process callbacks for preemptible RCU. - */ -static void rcu_preempt_process_callbacks(void) -{ -	__rcu_process_callbacks(&rcu_preempt_state, -				&__get_cpu_var(rcu_preempt_data)); -} -  #ifdef CONFIG_RCU_BOOST  static void rcu_preempt_do_callbacks(void) @@ -824,9 +758,9 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)  	int must_wait = 0;  	raw_spin_lock_irqsave(&rnp->lock, flags); -	if (list_empty(&rnp->blkd_tasks)) +	if (list_empty(&rnp->blkd_tasks)) {  		raw_spin_unlock_irqrestore(&rnp->lock, flags); -	else { +	} else {  		rnp->exp_tasks = rnp->blkd_tasks.next;  		rcu_initiate_boost(rnp, flags);  /* releases rnp->lock */  		must_wait = 1; @@ -870,9 +804,9 @@ void synchronize_rcu_expedited(void)  	 * expedited grace period for us, just leave.  	 */  	while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { -		if (trycount++ < 10) +		if (trycount++ < 10) {  			udelay(trycount * num_online_cpus()); -		else { +		} else {  			synchronize_rcu();  			return;  		} @@ -917,51 +851,16 @@ mb_ret:  }  EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); -/* - * Check to see if there is any immediate preemptible-RCU-related work - * to be done. - */ -static int rcu_preempt_pending(int cpu) -{ -	return __rcu_pending(&rcu_preempt_state, -			     &per_cpu(rcu_preempt_data, cpu)); -} - -/* - * Does preemptible RCU have callbacks on this CPU? - */ -static int rcu_preempt_cpu_has_callbacks(int cpu) -{ -	return !!per_cpu(rcu_preempt_data, cpu).nxtlist; -} -  /**   * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.   */  void rcu_barrier(void)  { -	_rcu_barrier(&rcu_preempt_state, call_rcu); +	_rcu_barrier(&rcu_preempt_state);  }  EXPORT_SYMBOL_GPL(rcu_barrier);  /* - * Initialize preemptible RCU's per-CPU data. - */ -static void __cpuinit rcu_preempt_init_percpu_data(int cpu) -{ -	rcu_init_percpu_data(cpu, &rcu_preempt_state, 1); -} - -/* - * Move preemptible RCU's callbacks from dying CPU to other online CPU - * and record a quiescent state. - */ -static void rcu_preempt_cleanup_dying_cpu(void) -{ -	rcu_cleanup_dying_cpu(&rcu_preempt_state); -} - -/*   * Initialize preemptible RCU's state structures.   */  static void __init __rcu_init_preempt(void) @@ -1046,14 +945,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp)  }  /* - * Because preemptible RCU does not exist, there is no need to suppress - * its CPU stall warnings. - */ -static void rcu_preempt_stall_reset(void) -{ -} - -/*   * Because there is no preemptible RCU, there can be no readers blocked,   * so there is no need to check for blocked tasks.  So check only for   * bogus qsmask values. @@ -1081,14 +972,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,  #endif /* #ifdef CONFIG_HOTPLUG_CPU */  /* - * Because preemptible RCU does not exist, it never needs CPU-offline - * processing. - */ -static void rcu_preempt_cleanup_dead_cpu(int cpu) -{ -} - -/*   * Because preemptible RCU does not exist, it never has any callbacks   * to check.   */ @@ -1097,14 +980,6 @@ static void rcu_preempt_check_callbacks(int cpu)  }  /* - * Because preemptible RCU does not exist, it never has any callbacks - * to process. - */ -static void rcu_preempt_process_callbacks(void) -{ -} - -/*   * Queue an RCU callback for lazy invocation after a grace period.   * This will likely be later named something like "call_rcu_lazy()",   * but this change will require some way of tagging the lazy RCU @@ -1145,22 +1020,6 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,  #endif /* #ifdef CONFIG_HOTPLUG_CPU */  /* - * Because preemptible RCU does not exist, it never has any work to do. - */ -static int rcu_preempt_pending(int cpu) -{ -	return 0; -} - -/* - * Because preemptible RCU does not exist, it never has callbacks - */ -static int rcu_preempt_cpu_has_callbacks(int cpu) -{ -	return 0; -} - -/*   * Because preemptible RCU does not exist, rcu_barrier() is just   * another name for rcu_barrier_sched().   */ @@ -1171,21 +1030,6 @@ void rcu_barrier(void)  EXPORT_SYMBOL_GPL(rcu_barrier);  /* - * Because preemptible RCU does not exist, there is no per-CPU - * data to initialize. - */ -static void __cpuinit rcu_preempt_init_percpu_data(int cpu) -{ -} - -/* - * Because there is no preemptible RCU, there is no cleanup to do. - */ -static void rcu_preempt_cleanup_dying_cpu(void) -{ -} - -/*   * Because preemptible RCU does not exist, it need not be initialized.   */  static void __init __rcu_init_preempt(void) @@ -1968,9 +1812,11 @@ static void rcu_idle_count_callbacks_posted(void)   */  #define RCU_IDLE_FLUSHES 5		/* Number of dyntick-idle tries. */  #define RCU_IDLE_OPT_FLUSHES 3		/* Optional dyntick-idle tries. */ -#define RCU_IDLE_GP_DELAY 6		/* Roughly one grace period. */ +#define RCU_IDLE_GP_DELAY 4		/* Roughly one grace period. */  #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ)	/* Roughly six seconds. */ +extern int tick_nohz_enabled; +  /*   * Does the specified flavor of RCU have non-lazy callbacks pending on   * the specified CPU?  Both RCU flavor and CPU are specified by the @@ -2047,10 +1893,13 @@ int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)  		return 1;  	}  	/* Set up for the possibility that RCU will post a timer. */ -	if (rcu_cpu_has_nonlazy_callbacks(cpu)) -		*delta_jiffies = RCU_IDLE_GP_DELAY; -	else -		*delta_jiffies = RCU_IDLE_LAZY_GP_DELAY; +	if (rcu_cpu_has_nonlazy_callbacks(cpu)) { +		*delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies, +					  RCU_IDLE_GP_DELAY) - jiffies; +	} else { +		*delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY; +		*delta_jiffies = round_jiffies(*delta_jiffies) - jiffies; +	}  	return 0;  } @@ -2109,6 +1958,7 @@ static void rcu_cleanup_after_idle(int cpu)  	del_timer(&rdtp->idle_gp_timer);  	trace_rcu_prep_idle("Cleanup after idle"); +	rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled);  }  /* @@ -2134,6 +1984,18 @@ static void rcu_prepare_for_idle(int cpu)  {  	struct timer_list *tp;  	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); +	int tne; + +	/* Handle nohz enablement switches conservatively. */ +	tne = ACCESS_ONCE(tick_nohz_enabled); +	if (tne != rdtp->tick_nohz_enabled_snap) { +		if (rcu_cpu_has_callbacks(cpu)) +			invoke_rcu_core(); /* force nohz to see update. */ +		rdtp->tick_nohz_enabled_snap = tne; +		return; +	} +	if (!tne) +		return;  	/*  	 * If this is an idle re-entry, for example, due to use of @@ -2187,10 +2049,11 @@ static void rcu_prepare_for_idle(int cpu)  		if (rcu_cpu_has_nonlazy_callbacks(cpu)) {  			trace_rcu_prep_idle("Dyntick with callbacks");  			rdtp->idle_gp_timer_expires = -					   jiffies + RCU_IDLE_GP_DELAY; +				round_up(jiffies + RCU_IDLE_GP_DELAY, +					 RCU_IDLE_GP_DELAY);  		} else {  			rdtp->idle_gp_timer_expires = -					   jiffies + RCU_IDLE_LAZY_GP_DELAY; +				round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);  			trace_rcu_prep_idle("Dyntick with lazy callbacks");  		}  		tp = &rdtp->idle_gp_timer; @@ -2231,8 +2094,9 @@ static void rcu_prepare_for_idle(int cpu)  	if (rcu_cpu_has_callbacks(cpu)) {  		trace_rcu_prep_idle("More callbacks");  		invoke_rcu_core(); -	} else +	} else {  		trace_rcu_prep_idle("Callbacks drained"); +	}  }  /* @@ -2269,6 +2133,7 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu)  static void print_cpu_stall_fast_no_hz(char *cp, int cpu)  { +	*cp = '\0';  }  #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index d4bc16ddd1d..abffb486e94 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -46,6 +46,31 @@  #define RCU_TREE_NONCORE  #include "rcutree.h" +static int show_rcubarrier(struct seq_file *m, void *unused) +{ +	struct rcu_state *rsp; + +	for_each_rcu_flavor(rsp) +		seq_printf(m, "%s: %c bcc: %d nbd: %lu\n", +			   rsp->name, rsp->rcu_barrier_in_progress ? 'B' : '.', +			   atomic_read(&rsp->barrier_cpu_count), +			   rsp->n_barrier_done); +	return 0; +} + +static int rcubarrier_open(struct inode *inode, struct file *file) +{ +	return single_open(file, show_rcubarrier, NULL); +} + +static const struct file_operations rcubarrier_fops = { +	.owner = THIS_MODULE, +	.open = rcubarrier_open, +	.read = seq_read, +	.llseek = seq_lseek, +	.release = single_release, +}; +  #ifdef CONFIG_RCU_BOOST  static char convert_kthread_status(unsigned int kthread_status) @@ -95,24 +120,16 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)  		   rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted);  } -#define PRINT_RCU_DATA(name, func, m) \ -	do { \ -		int _p_r_d_i; \ -		\ -		for_each_possible_cpu(_p_r_d_i) \ -			func(m, &per_cpu(name, _p_r_d_i)); \ -	} while (0) -  static int show_rcudata(struct seq_file *m, void *unused)  { -#ifdef CONFIG_TREE_PREEMPT_RCU -	seq_puts(m, "rcu_preempt:\n"); -	PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data, m); -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -	seq_puts(m, "rcu_sched:\n"); -	PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data, m); -	seq_puts(m, "rcu_bh:\n"); -	PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data, m); +	int cpu; +	struct rcu_state *rsp; + +	for_each_rcu_flavor(rsp) { +		seq_printf(m, "%s:\n", rsp->name); +		for_each_possible_cpu(cpu) +			print_one_rcu_data(m, per_cpu_ptr(rsp->rda, cpu)); +	}  	return 0;  } @@ -166,6 +183,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)  static int show_rcudata_csv(struct seq_file *m, void *unused)  { +	int cpu; +	struct rcu_state *rsp; +  	seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");  	seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");  	seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\""); @@ -173,14 +193,11 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)  	seq_puts(m, "\"kt\",\"ktl\"");  #endif /* #ifdef CONFIG_RCU_BOOST */  	seq_puts(m, ",\"b\",\"ci\",\"co\",\"ca\"\n"); -#ifdef CONFIG_TREE_PREEMPT_RCU -	seq_puts(m, "\"rcu_preempt:\"\n"); -	PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -	seq_puts(m, "\"rcu_sched:\"\n"); -	PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data_csv, m); -	seq_puts(m, "\"rcu_bh:\"\n"); -	PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data_csv, m); +	for_each_rcu_flavor(rsp) { +		seq_printf(m, "\"%s:\"\n", rsp->name); +		for_each_possible_cpu(cpu) +			print_one_rcu_data_csv(m, per_cpu_ptr(rsp->rda, cpu)); +	}  	return 0;  } @@ -201,8 +218,7 @@ static const struct file_operations rcudata_csv_fops = {  static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp)  { -	seq_printf(m,  "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu " -		   "j=%04x bt=%04x\n", +	seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu ",  		   rnp->grplo, rnp->grphi,  		   "T."[list_empty(&rnp->blkd_tasks)],  		   "N."[!rnp->gp_tasks], @@ -210,11 +226,11 @@ static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp)  		   "B."[!rnp->boost_tasks],  		   convert_kthread_status(rnp->boost_kthread_status),  		   rnp->n_tasks_boosted, rnp->n_exp_boosts, -		   rnp->n_normal_boosts, +		   rnp->n_normal_boosts); +	seq_printf(m, "j=%04x bt=%04x\n",  		   (int)(jiffies & 0xffff),  		   (int)(rnp->boost_time & 0xffff)); -	seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n", -		   "     balk", +	seq_printf(m, "    balk: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n",  		   rnp->n_balk_blkd_tasks,  		   rnp->n_balk_exp_gp_tasks,  		   rnp->n_balk_boost_tasks, @@ -270,15 +286,15 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)  	struct rcu_node *rnp;  	gpnum = rsp->gpnum; -	seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " -		      "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", -		   rsp->completed, gpnum, rsp->fqs_state, +	seq_printf(m, "%s: c=%lu g=%lu s=%d jfq=%ld j=%x ", +		   rsp->name, rsp->completed, gpnum, rsp->fqs_state,  		   (long)(rsp->jiffies_force_qs - jiffies), -		   (int)(jiffies & 0xffff), +		   (int)(jiffies & 0xffff)); +	seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",  		   rsp->n_force_qs, rsp->n_force_qs_ngp,  		   rsp->n_force_qs - rsp->n_force_qs_ngp,  		   rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen); -	for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { +	for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {  		if (rnp->level != level) {  			seq_puts(m, "\n");  			level = rnp->level; @@ -295,14 +311,10 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)  static int show_rcuhier(struct seq_file *m, void *unused)  { -#ifdef CONFIG_TREE_PREEMPT_RCU -	seq_puts(m, "rcu_preempt:\n"); -	print_one_rcu_state(m, &rcu_preempt_state); -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -	seq_puts(m, "rcu_sched:\n"); -	print_one_rcu_state(m, &rcu_sched_state); -	seq_puts(m, "rcu_bh:\n"); -	print_one_rcu_state(m, &rcu_bh_state); +	struct rcu_state *rsp; + +	for_each_rcu_flavor(rsp) +		print_one_rcu_state(m, rsp);  	return 0;  } @@ -343,11 +355,10 @@ static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)  static int show_rcugp(struct seq_file *m, void *unused)  { -#ifdef CONFIG_TREE_PREEMPT_RCU -	show_one_rcugp(m, &rcu_preempt_state); -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -	show_one_rcugp(m, &rcu_sched_state); -	show_one_rcugp(m, &rcu_bh_state); +	struct rcu_state *rsp; + +	for_each_rcu_flavor(rsp) +		show_one_rcugp(m, rsp);  	return 0;  } @@ -366,44 +377,36 @@ static const struct file_operations rcugp_fops = {  static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)  { -	seq_printf(m, "%3d%cnp=%ld " -		   "qsp=%ld rpq=%ld cbr=%ld cng=%ld " -		   "gpc=%ld gps=%ld nf=%ld nn=%ld\n", +	seq_printf(m, "%3d%cnp=%ld ",  		   rdp->cpu,  		   cpu_is_offline(rdp->cpu) ? '!' : ' ', -		   rdp->n_rcu_pending, +		   rdp->n_rcu_pending); +	seq_printf(m, "qsp=%ld rpq=%ld cbr=%ld cng=%ld ",  		   rdp->n_rp_qs_pending,  		   rdp->n_rp_report_qs,  		   rdp->n_rp_cb_ready, -		   rdp->n_rp_cpu_needs_gp, +		   rdp->n_rp_cpu_needs_gp); +	seq_printf(m, "gpc=%ld gps=%ld nf=%ld nn=%ld\n",  		   rdp->n_rp_gp_completed,  		   rdp->n_rp_gp_started,  		   rdp->n_rp_need_fqs,  		   rdp->n_rp_need_nothing);  } -static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp) +static int show_rcu_pending(struct seq_file *m, void *unused)  {  	int cpu;  	struct rcu_data *rdp; +	struct rcu_state *rsp; -	for_each_possible_cpu(cpu) { -		rdp = per_cpu_ptr(rsp->rda, cpu); -		if (rdp->beenonline) -			print_one_rcu_pending(m, rdp); +	for_each_rcu_flavor(rsp) { +		seq_printf(m, "%s:\n", rsp->name); +		for_each_possible_cpu(cpu) { +			rdp = per_cpu_ptr(rsp->rda, cpu); +			if (rdp->beenonline) +				print_one_rcu_pending(m, rdp); +		}  	} -} - -static int show_rcu_pending(struct seq_file *m, void *unused) -{ -#ifdef CONFIG_TREE_PREEMPT_RCU -	seq_puts(m, "rcu_preempt:\n"); -	print_rcu_pendings(m, &rcu_preempt_state); -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -	seq_puts(m, "rcu_sched:\n"); -	print_rcu_pendings(m, &rcu_sched_state); -	seq_puts(m, "rcu_bh:\n"); -	print_rcu_pendings(m, &rcu_bh_state);  	return 0;  } @@ -453,6 +456,11 @@ static int __init rcutree_trace_init(void)  	if (!rcudir)  		goto free_out; +	retval = debugfs_create_file("rcubarrier", 0444, rcudir, +						NULL, &rcubarrier_fops); +	if (!retval) +		goto free_out; +  	retval = debugfs_create_file("rcudata", 0444, rcudir,  						NULL, &rcudata_fops);  	if (!retval) diff --git a/kernel/smp.c b/kernel/smp.c index d0ae5b24875..29dd40a9f2f 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -581,26 +581,6 @@ int smp_call_function(smp_call_func_t func, void *info, int wait)  	return 0;  }  EXPORT_SYMBOL(smp_call_function); - -void ipi_call_lock(void) -{ -	raw_spin_lock(&call_function.lock); -} - -void ipi_call_unlock(void) -{ -	raw_spin_unlock(&call_function.lock); -} - -void ipi_call_lock_irq(void) -{ -	raw_spin_lock_irq(&call_function.lock); -} - -void ipi_call_unlock_irq(void) -{ -	raw_spin_unlock_irq(&call_function.lock); -}  #endif /* USE_GENERIC_SMP_HELPERS */  /* Setup configured maximum number of CPUs to activate */ diff --git a/kernel/smpboot.h b/kernel/smpboot.h index 80c0acfb847..6ef9433e1c7 100644 --- a/kernel/smpboot.h +++ b/kernel/smpboot.h @@ -3,8 +3,6 @@  struct task_struct; -int smpboot_prepare(unsigned int cpu); -  #ifdef CONFIG_GENERIC_SMP_IDLE_THREAD  struct task_struct *idle_thread_get(unsigned int cpu);  void idle_thread_set_boot_cpu(void); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 41be02250e0..024540f97f7 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -105,7 +105,7 @@ static ktime_t tick_init_jiffy_update(void)  /*   * NO HZ enabled ?   */ -static int tick_nohz_enabled __read_mostly  = 1; +int tick_nohz_enabled __read_mostly  = 1;  /*   * Enable / Disable tickless mode diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 8f2aba1246f..cf364db5589 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -745,6 +745,7 @@ static void timekeeping_resume(void)  	timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);  	timekeeper.ntp_error = 0;  	timekeeping_suspended = 0; +	timekeeping_update(&timekeeper, false);  	write_sequnlock_irqrestore(&timekeeper.lock, flags);  	touch_softlockup_watchdog(); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a008663d86c..b4f20fba09f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -312,7 +312,7 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list,  static int __register_ftrace_function(struct ftrace_ops *ops)  { -	if (ftrace_disabled) +	if (unlikely(ftrace_disabled))  		return -ENODEV;  	if (FTRACE_WARN_ON(ops == &global_ops)) @@ -4299,16 +4299,12 @@ int register_ftrace_function(struct ftrace_ops *ops)  	mutex_lock(&ftrace_lock); -	if (unlikely(ftrace_disabled)) -		goto out_unlock; -  	ret = __register_ftrace_function(ops);  	if (!ret)  		ret = ftrace_startup(ops, 0); - - out_unlock:  	mutex_unlock(&ftrace_lock); +  	return ret;  }  EXPORT_SYMBOL_GPL(register_ftrace_function); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f765465bffe..49491fa7daa 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3239,6 +3239,10 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)  	if (cpu_buffer->commit_page == cpu_buffer->reader_page)  		goto out; +	/* Don't bother swapping if the ring buffer is empty */ +	if (rb_num_of_entries(cpu_buffer) == 0) +		goto out; +  	/*  	 * Reset the reader page to size zero.  	 */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a7fa0702be1..a120f98c411 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -830,6 +830,8 @@ int register_tracer(struct tracer *type)  		current_trace = saved_tracer;  		if (ret) {  			printk(KERN_CONT "FAILED!\n"); +			/* Add the warning after printing 'FAILED' */ +			WARN_ON(1);  			goto out;  		}  		/* Only reset on passing, to avoid touching corrupted buffers */ @@ -1708,9 +1710,11 @@ EXPORT_SYMBOL_GPL(trace_vprintk);  static void trace_iterator_increment(struct trace_iterator *iter)  { +	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); +  	iter->idx++; -	if (iter->buffer_iter[iter->cpu]) -		ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); +	if (buf_iter) +		ring_buffer_read(buf_iter, NULL);  }  static struct trace_entry * @@ -1718,7 +1722,7 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,  		unsigned long *lost_events)  {  	struct ring_buffer_event *event; -	struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; +	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);  	if (buf_iter)  		event = ring_buffer_iter_peek(buf_iter, ts); @@ -1856,10 +1860,10 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)  	tr->data[cpu]->skipped_entries = 0; -	if (!iter->buffer_iter[cpu]) +	buf_iter = trace_buffer_iter(iter, cpu); +	if (!buf_iter)  		return; -	buf_iter = iter->buffer_iter[cpu];  	ring_buffer_iter_reset(buf_iter);  	/* @@ -2205,13 +2209,15 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)  int trace_empty(struct trace_iterator *iter)  { +	struct ring_buffer_iter *buf_iter;  	int cpu;  	/* If we are looking at one CPU buffer, only check that one */  	if (iter->cpu_file != TRACE_PIPE_ALL_CPU) {  		cpu = iter->cpu_file; -		if (iter->buffer_iter[cpu]) { -			if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) +		buf_iter = trace_buffer_iter(iter, cpu); +		if (buf_iter) { +			if (!ring_buffer_iter_empty(buf_iter))  				return 0;  		} else {  			if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) @@ -2221,8 +2227,9 @@ int trace_empty(struct trace_iterator *iter)  	}  	for_each_tracing_cpu(cpu) { -		if (iter->buffer_iter[cpu]) { -			if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) +		buf_iter = trace_buffer_iter(iter, cpu); +		if (buf_iter) { +			if (!ring_buffer_iter_empty(buf_iter))  				return 0;  		} else {  			if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) @@ -2381,6 +2388,11 @@ __tracing_open(struct inode *inode, struct file *file)  	if (!iter)  		return ERR_PTR(-ENOMEM); +	iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(), +				    GFP_KERNEL); +	if (!iter->buffer_iter) +		goto release; +  	/*  	 * We make a copy of the current tracer to avoid concurrent  	 * changes on it while we are reading. @@ -2441,6 +2453,8 @@ __tracing_open(struct inode *inode, struct file *file)   fail:  	mutex_unlock(&trace_types_lock);  	kfree(iter->trace); +	kfree(iter->buffer_iter); +release:  	seq_release_private(inode, file);  	return ERR_PTR(-ENOMEM);  } @@ -2481,6 +2495,7 @@ static int tracing_release(struct inode *inode, struct file *file)  	mutex_destroy(&iter->mutex);  	free_cpumask_var(iter->started);  	kfree(iter->trace); +	kfree(iter->buffer_iter);  	seq_release_private(inode, file);  	return 0;  } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 5aec220d2de..55e1f7f0db1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -317,6 +317,14 @@ struct tracer {  #define TRACE_PIPE_ALL_CPU	-1 +static inline struct ring_buffer_iter * +trace_buffer_iter(struct trace_iterator *iter, int cpu) +{ +	if (iter->buffer_iter && iter->buffer_iter[cpu]) +		return iter->buffer_iter[cpu]; +	return NULL; +} +  int tracer_init(struct tracer *t, struct trace_array *tr);  int tracing_is_enabled(void);  void trace_wake_up(void); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index a7d2a4c653d..ce27c8ba8d3 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -538,7 +538,7 @@ get_return_for_leaf(struct trace_iterator *iter,  		next = &data->ret;  	} else { -		ring_iter = iter->buffer_iter[iter->cpu]; +		ring_iter = trace_buffer_iter(iter, iter->cpu);  		/* First peek to compare current entry and the next one */  		if (ring_iter) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index df611a0e76c..123b189c732 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1325,4 +1325,4 @@ __init static int init_events(void)  	return 0;  } -device_initcall(init_events); +early_initcall(init_events); diff --git a/lib/list_debug.c b/lib/list_debug.c index 23a5e031cd8..c24c2f7e296 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -87,12 +87,10 @@ void __list_add_rcu(struct list_head *new,  		    struct list_head *prev, struct list_head *next)  {  	WARN(next->prev != prev, -		"list_add_rcu corruption. next->prev should be " -		"prev (%p), but was %p. (next=%p).\n", +		"list_add_rcu corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",  		prev, next->prev, next);  	WARN(prev->next != next, -		"list_add_rcu corruption. prev->next should be " -		"next (%p), but was %p. (prev=%p).\n", +		"list_add_rcu corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",  		next, prev->next, prev);  	new->next = next;  	new->prev = prev; diff --git a/mm/bootmem.c b/mm/bootmem.c index 73096630cb3..bcb63ac48cc 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -710,6 +710,10 @@ again:  	if (ptr)  		return ptr; +	/* do not panic in alloc_bootmem_bdata() */ +	if (limit && goal + size > limit) +		limit = 0; +  	ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit);  	if (ptr)  		return ptr; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 44030096da6..4a4f9219683 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5635,7 +5635,12 @@ static struct page *  __alloc_contig_migrate_alloc(struct page *page, unsigned long private,  			     int **resultp)  { -	return alloc_page(GFP_HIGHUSER_MOVABLE); +	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; + +	if (PageHighMem(page)) +		gfp_mask |= __GFP_HIGHMEM; + +	return alloc_page(gfp_mask);  }  /* [start, end) must belong to a single zone. */ diff --git a/mm/vmscan.c b/mm/vmscan.c index 661576324c7..66e431060c0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2688,7 +2688,10 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)  		 * them before going back to sleep.  		 */  		set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); -		schedule(); + +		if (!kthread_should_stop()) +			schedule(); +  		set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold);  	} else {  		if (remaining) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 6089f0cf23b..9096bcb0813 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -403,6 +403,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,  		break;  	case NETDEV_DOWN: +		if (dev->features & NETIF_F_HW_VLAN_FILTER) +			vlan_vid_del(dev, 0); +  		/* Put all VLANs for this dev in the down state too.  */  		for (i = 0; i < VLAN_N_VID; i++) {  			vlandev = vlan_group_get_device(grp, i); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 051f7abae66..779095ded68 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -842,6 +842,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,  		case AX25_P_NETROM:  			if (ax25_protocol_is_registered(AX25_P_NETROM))  				return -ESOCKTNOSUPPORT; +			break;  #endif  #ifdef CONFIG_ROSE_MODULE  		case AX25_P_ROSE: diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 8bf97515a77..c5863f49913 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1351,6 +1351,7 @@ void bla_free(struct bat_priv *bat_priv)   * @bat_priv: the bat priv with all the soft interface information   * @skb: the frame to be checked   * @vid: the VLAN ID of the frame + * @is_bcast: the packet came in a broadcast packet type.   *   * bla_rx avoidance checks if:   *  * we have to race for a claim @@ -1361,7 +1362,8 @@ void bla_free(struct bat_priv *bat_priv)   * process the skb.   *   */ -int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid) +int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid, +	   bool is_bcast)  {  	struct ethhdr *ethhdr;  	struct claim search_claim, *claim = NULL; @@ -1380,7 +1382,7 @@ int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid)  	if (unlikely(atomic_read(&bat_priv->bla_num_requests)))  		/* don't allow broadcasts while requests are in flight */ -		if (is_multicast_ether_addr(ethhdr->h_dest)) +		if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast)  			goto handled;  	memcpy(search_claim.addr, ethhdr->h_source, ETH_ALEN); @@ -1406,8 +1408,13 @@ int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid)  	}  	/* if it is a broadcast ... */ -	if (is_multicast_ether_addr(ethhdr->h_dest)) { -		/* ... drop it. the responsible gateway is in charge. */ +	if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) { +		/* ... drop it. the responsible gateway is in charge. +		 * +		 * We need to check is_bcast because with the gateway +		 * feature, broadcasts (like DHCP requests) may be sent +		 * using a unicast packet type. +		 */  		goto handled;  	} else {  		/* seems the client considers us as its best gateway. diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index e39f93acc28..dc5227b398d 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -23,7 +23,8 @@  #define _NET_BATMAN_ADV_BLA_H_  #ifdef CONFIG_BATMAN_ADV_BLA -int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid); +int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid, +	   bool is_bcast);  int bla_tx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid);  int bla_is_backbone_gw(struct sk_buff *skb,  		       struct orig_node *orig_node, int hdr_size); @@ -41,7 +42,7 @@ void bla_free(struct bat_priv *bat_priv);  #else /* ifdef CONFIG_BATMAN_ADV_BLA */  static inline int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, -			 short vid) +			 short vid, bool is_bcast)  {  	return 0;  } diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 6e2530b0204..a0ec0e4ada4 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -256,7 +256,11 @@ void interface_rx(struct net_device *soft_iface,  	struct bat_priv *bat_priv = netdev_priv(soft_iface);  	struct ethhdr *ethhdr;  	struct vlan_ethhdr *vhdr; +	struct batman_header *batadv_header = (struct batman_header *)skb->data;  	short vid __maybe_unused = -1; +	bool is_bcast; + +	is_bcast = (batadv_header->packet_type == BAT_BCAST);  	/* check if enough space is available for pulling, and pull */  	if (!pskb_may_pull(skb, hdr_size)) @@ -302,7 +306,7 @@ void interface_rx(struct net_device *soft_iface,  	/* Let the bridge loop avoidance check the packet. If will  	 * not handle it, we can safely push it up.  	 */ -	if (bla_rx(bat_priv, skb, vid)) +	if (bla_rx(bat_priv, skb, vid, is_bcast))  		goto out;  	netif_rx(skb); diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 554b3128960..8c83c175b03 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -561,9 +561,9 @@ static int __init caif_device_init(void)  static void __exit caif_device_exit(void)  { -	unregister_pernet_subsys(&caif_net_ops);  	unregister_netdevice_notifier(&caif_device_notifier);  	dev_remove_pack(&caif_packet_type); +	unregister_pernet_subsys(&caif_net_ops);  }  module_init(caif_device_init); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index b332c3d7605..10255e81be7 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1423,7 +1423,7 @@ static int process_connect(struct ceph_connection *con)  		 * dropped messages.  		 */  		dout("process_connect got RESET peer seq %u\n", -		     le32_to_cpu(con->in_connect.connect_seq)); +		     le32_to_cpu(con->in_reply.connect_seq));  		pr_err("%s%lld %s connection reset\n",  		       ENTITY_NAME(con->peer_name),  		       ceph_pr_addr(&con->peer_addr.in_addr)); @@ -1450,10 +1450,10 @@ static int process_connect(struct ceph_connection *con)  		 * If we sent a smaller connect_seq than the peer has, try  		 * again with a larger value.  		 */ -		dout("process_connect got RETRY my seq = %u, peer_seq = %u\n", +		dout("process_connect got RETRY_SESSION my seq %u, peer %u\n",  		     le32_to_cpu(con->out_connect.connect_seq), -		     le32_to_cpu(con->in_connect.connect_seq)); -		con->connect_seq = le32_to_cpu(con->in_connect.connect_seq); +		     le32_to_cpu(con->in_reply.connect_seq)); +		con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);  		ceph_con_out_kvec_reset(con);  		ret = prepare_write_connect(con);  		if (ret < 0) @@ -1468,9 +1468,9 @@ static int process_connect(struct ceph_connection *con)  		 */  		dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n",  		     con->peer_global_seq, -		     le32_to_cpu(con->in_connect.global_seq)); +		     le32_to_cpu(con->in_reply.global_seq));  		get_global_seq(con->msgr, -			       le32_to_cpu(con->in_connect.global_seq)); +			       le32_to_cpu(con->in_reply.global_seq));  		ceph_con_out_kvec_reset(con);  		ret = prepare_write_connect(con);  		if (ret < 0) diff --git a/net/core/dev.c b/net/core/dev.c index 84f01ba81a3..1cb0d8a6aa6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2444,8 +2444,12 @@ static void skb_update_prio(struct sk_buff *skb)  {  	struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); -	if ((!skb->priority) && (skb->sk) && map) -		skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx]; +	if (!skb->priority && skb->sk && map) { +		unsigned int prioidx = skb->sk->sk_cgrp_prioidx; + +		if (prioidx < map->priomap_len) +			skb->priority = map->priomap[prioidx]; +	}  }  #else  #define skb_update_prio(skb) @@ -6279,7 +6283,8 @@ static struct hlist_head *netdev_create_hash(void)  /* Initialize per network namespace state */  static int __net_init netdev_init(struct net *net)  { -	INIT_LIST_HEAD(&net->dev_base_head); +	if (net != &init_net) +		INIT_LIST_HEAD(&net->dev_base_head);  	net->dev_name_head = netdev_create_hash();  	if (net->dev_name_head == NULL) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index dddbacb8f28..42f1e1c7514 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -27,7 +27,9 @@ static DEFINE_MUTEX(net_mutex);  LIST_HEAD(net_namespace_list);  EXPORT_SYMBOL_GPL(net_namespace_list); -struct net init_net; +struct net init_net = { +	.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), +};  EXPORT_SYMBOL(init_net);  #define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */ diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 5b8aa2fae48..b2e9caa1ad1 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -49,8 +49,9 @@ static int get_prioidx(u32 *prio)  		return -ENOSPC;  	}  	set_bit(prioidx, prioidx_map); +	if (atomic_read(&max_prioidx) < prioidx) +		atomic_set(&max_prioidx, prioidx);  	spin_unlock_irqrestore(&prioidx_map_lock, flags); -	atomic_set(&max_prioidx, prioidx);  	*prio = prioidx;  	return 0;  } @@ -64,7 +65,7 @@ static void put_prioidx(u32 idx)  	spin_unlock_irqrestore(&prioidx_map_lock, flags);  } -static void extend_netdev_table(struct net_device *dev, u32 new_len) +static int extend_netdev_table(struct net_device *dev, u32 new_len)  {  	size_t new_size = sizeof(struct netprio_map) +  			   ((sizeof(u32) * new_len)); @@ -76,7 +77,7 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len)  	if (!new_priomap) {  		pr_warn("Unable to alloc new priomap!\n"); -		return; +		return -ENOMEM;  	}  	for (i = 0; @@ -89,46 +90,79 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len)  	rcu_assign_pointer(dev->priomap, new_priomap);  	if (old_priomap)  		kfree_rcu(old_priomap, rcu); +	return 0;  } -static void update_netdev_tables(void) +static int write_update_netdev_table(struct net_device *dev)  { +	int ret = 0; +	u32 max_len; +	struct netprio_map *map; + +	rtnl_lock(); +	max_len = atomic_read(&max_prioidx) + 1; +	map = rtnl_dereference(dev->priomap); +	if (!map || map->priomap_len < max_len) +		ret = extend_netdev_table(dev, max_len); +	rtnl_unlock(); + +	return ret; +} + +static int update_netdev_tables(void) +{ +	int ret = 0;  	struct net_device *dev; -	u32 max_len = atomic_read(&max_prioidx) + 1; +	u32 max_len;  	struct netprio_map *map;  	rtnl_lock(); +	max_len = atomic_read(&max_prioidx) + 1;  	for_each_netdev(&init_net, dev) {  		map = rtnl_dereference(dev->priomap); -		if ((!map) || -		    (map->priomap_len < max_len)) -			extend_netdev_table(dev, max_len); +		/* +		 * don't allocate priomap if we didn't +		 * change net_prio.ifpriomap (map == NULL), +		 * this will speed up skb_update_prio. +		 */ +		if (map && map->priomap_len < max_len) { +			ret = extend_netdev_table(dev, max_len); +			if (ret < 0) +				break; +		}  	}  	rtnl_unlock(); +	return ret;  }  static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)  {  	struct cgroup_netprio_state *cs; -	int ret; +	int ret = -EINVAL;  	cs = kzalloc(sizeof(*cs), GFP_KERNEL);  	if (!cs)  		return ERR_PTR(-ENOMEM); -	if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) { -		kfree(cs); -		return ERR_PTR(-EINVAL); -	} +	if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) +		goto out;  	ret = get_prioidx(&cs->prioidx); -	if (ret != 0) { +	if (ret < 0) {  		pr_warn("No space in priority index array\n"); -		kfree(cs); -		return ERR_PTR(ret); +		goto out; +	} + +	ret = update_netdev_tables(); +	if (ret < 0) { +		put_prioidx(cs->prioidx); +		goto out;  	}  	return &cs->css; +out: +	kfree(cs); +	return ERR_PTR(ret);  }  static void cgrp_destroy(struct cgroup *cgrp) @@ -141,7 +175,7 @@ static void cgrp_destroy(struct cgroup *cgrp)  	rtnl_lock();  	for_each_netdev(&init_net, dev) {  		map = rtnl_dereference(dev->priomap); -		if (map) +		if (map && cs->prioidx < map->priomap_len)  			map->priomap[cs->prioidx] = 0;  	}  	rtnl_unlock(); @@ -165,7 +199,7 @@ static int read_priomap(struct cgroup *cont, struct cftype *cft,  	rcu_read_lock();  	for_each_netdev_rcu(&init_net, dev) {  		map = rcu_dereference(dev->priomap); -		priority = map ? map->priomap[prioidx] : 0; +		priority = (map && prioidx < map->priomap_len) ? map->priomap[prioidx] : 0;  		cb->fill(cb, dev->name, priority);  	}  	rcu_read_unlock(); @@ -220,13 +254,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft,  	if (!dev)  		goto out_free_devname; -	update_netdev_tables(); -	ret = 0; +	ret = write_update_netdev_table(dev); +	if (ret < 0) +		goto out_put_dev; +  	rcu_read_lock();  	map = rcu_dereference(dev->priomap);  	if (map)  		map->priomap[prioidx] = priority;  	rcu_read_unlock(); + +out_put_dev:  	dev_put(dev);  out_free_devname: diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 46a3d23d259..d124306b81f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -353,7 +353,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,  	unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) +  			      SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); -	if (fragsz <= PAGE_SIZE && !(gfp_mask & __GFP_WAIT)) { +	if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {  		void *data = netdev_alloc_frag(fragsz);  		if (likely(data)) { diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 6fbb2ad7bb6..16705611589 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -230,6 +230,12 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,  	mtu = dev->mtu;  	pr_debug("name = %s, mtu = %u\n", dev->name, mtu); +	if (size > mtu) { +		pr_debug("size = %Zu, mtu = %u\n", size, mtu); +		err = -EINVAL; +		goto out_dev; +	} +  	hlen = LL_RESERVED_SPACE(dev);  	tlen = dev->needed_tailroom;  	skb = sock_alloc_send_skb(sk, hlen + tlen + size, @@ -258,12 +264,6 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,  	if (err < 0)  		goto out_skb; -	if (size > mtu) { -		pr_debug("size = %Zu, mtu = %u\n", size, mtu); -		err = -EINVAL; -		goto out_skb; -	} -  	skb->dev = dev;  	skb->sk  = sk;  	skb->protocol = htons(ETH_P_IEEE802154); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index c48adc565e9..667c1d4ca98 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1725,8 +1725,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)  		case CIPSO_V4_TAG_LOCAL:  			/* This is a non-standard tag that we only allow for  			 * local connections, so if the incoming interface is -			 * not the loopback device drop the packet. */ -			if (!(skb->dev->flags & IFF_LOOPBACK)) { +			 * not the loopback device drop the packet. Further, +			 * there is no legitimate reason for setting this from +			 * userspace so reject it if skb is NULL. */ +			if (skb == NULL || !(skb->dev->flags & IFF_LOOPBACK)) {  				err_offset = opt_iter;  				goto validate_return_locked;  			} diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a4bb856de08..0db5d34a06b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2174,15 +2174,13 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,  		       sdata->name, mgmt->sa, status_code);  		ieee80211_destroy_assoc_data(sdata, false);  	} else { -		printk(KERN_DEBUG "%s: associated\n", sdata->name); -  		if (!ieee80211_assoc_success(sdata, *bss, mgmt, len)) {  			/* oops -- internal error -- send timeout for now */ -			ieee80211_destroy_assoc_data(sdata, true); -			sta_info_destroy_addr(sdata, mgmt->bssid); +			ieee80211_destroy_assoc_data(sdata, false);  			cfg80211_put_bss(*bss);  			return RX_MGMT_CFG80211_ASSOC_TIMEOUT;  		} +		printk(KERN_DEBUG "%s: associated\n", sdata->name);  		/*  		 * destroy assoc_data afterwards, as otherwise an idle diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 2d1acc6c544..f9e51ef8dfa 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -809,7 +809,7 @@ minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)  			max_rates = sband->n_bitrates;  	} -	msp = kzalloc(sizeof(struct minstrel_ht_sta), gfp); +	msp = kzalloc(sizeof(*msp), gfp);  	if (!msp)  		return NULL; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d43e3c122f7..84444dda194 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1521,11 +1521,12 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,  {  	struct net_device *dev = ptr;  	struct net *net = dev_net(dev); +	struct netns_ipvs *ipvs = net_ipvs(net);  	struct ip_vs_service *svc;  	struct ip_vs_dest *dest;  	unsigned int idx; -	if (event != NETDEV_UNREGISTER) +	if (event != NETDEV_UNREGISTER || !ipvs)  		return NOTIFY_DONE;  	IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);  	EnterFunction(2); @@ -1551,7 +1552,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,  		}  	} -	list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) { +	list_for_each_entry(dest, &ipvs->dest_trash, n_list) {  		__ip_vs_dev_reset(dest, dev);  	}  	mutex_unlock(&__ip_vs_mutex); diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 035960ec5cb..c6f7db720d8 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -16,6 +16,7 @@  #include <linux/netfilter/x_tables.h>  #include <linux/netfilter/xt_set.h> +#include <linux/netfilter/ipset/ip_set_timeout.h>  MODULE_LICENSE("GPL");  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -310,7 +311,8 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)  		info->del_set.flags, 0, UINT_MAX);  	/* Normalize to fit into jiffies */ -	if (add_opt.timeout > UINT_MAX/MSEC_PER_SEC) +	if (add_opt.timeout != IPSET_NO_TIMEOUT && +	    add_opt.timeout > UINT_MAX/MSEC_PER_SEC)  		add_opt.timeout = UINT_MAX/MSEC_PER_SEC;  	if (info->add_set.index != IPSET_INVALID_ID)  		ip_set_add(info->add_set.index, skb, par, &add_opt); diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index 17a707db40e..e06d458fc71 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -292,7 +292,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *addr,  	pr_debug("%p\n", sk); -	if (llcp_sock == NULL) +	if (llcp_sock == NULL || llcp_sock->dev == NULL)  		return -EBADFD;  	addr->sa_family = AF_NFC; diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c index 2754f098d43..bebaa43484b 100644 --- a/net/rxrpc/ar-peer.c +++ b/net/rxrpc/ar-peer.c @@ -229,7 +229,7 @@ found_UDP_peer:  	return peer;  new_UDP_peer: -	_net("Rx UDP DGRAM from NEW peer %d", peer->debug_id); +	_net("Rx UDP DGRAM from NEW peer");  	read_unlock_bh(&rxrpc_peer_lock);  	_leave(" = -EBUSY [new]");  	return ERR_PTR(-EBUSY); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a2a95aabf9c..c412ad0d030 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -331,29 +331,22 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche  	return PSCHED_NS2TICKS(ticks);  } -static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) +static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)  {  	struct sk_buff_head *list = &sch->q;  	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; -	struct sk_buff *skb; - -	if (likely(skb_queue_len(list) < sch->limit)) { -		skb = skb_peek_tail(list); -		/* Optimize for add at tail */ -		if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send)) -			return qdisc_enqueue_tail(nskb, sch); +	struct sk_buff *skb = skb_peek_tail(list); -		skb_queue_reverse_walk(list, skb) { -			if (tnext >= netem_skb_cb(skb)->time_to_send) -				break; -		} +	/* Optimize for add at tail */ +	if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send)) +		return __skb_queue_tail(list, nskb); -		__skb_queue_after(list, skb, nskb); -		sch->qstats.backlog += qdisc_pkt_len(nskb); -		return NET_XMIT_SUCCESS; +	skb_queue_reverse_walk(list, skb) { +		if (tnext >= netem_skb_cb(skb)->time_to_send) +			break;  	} -	return qdisc_reshape_fail(nskb, sch); +	__skb_queue_after(list, skb, nskb);  }  /* @@ -368,7 +361,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)  	/* We don't fill cb now as skb_unshare() may invalidate it */  	struct netem_skb_cb *cb;  	struct sk_buff *skb2; -	int ret;  	int count = 1;  	/* Random duplication */ @@ -419,6 +411,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)  		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);  	} +	if (unlikely(skb_queue_len(&sch->q) >= sch->limit)) +		return qdisc_reshape_fail(skb, sch); + +	sch->qstats.backlog += qdisc_pkt_len(skb); +  	cb = netem_skb_cb(skb);  	if (q->gap == 0 ||		/* not doing reordering */  	    q->counter < q->gap - 1 ||	/* inside last reordering gap */ @@ -450,7 +447,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)  		cb->time_to_send = now + delay;  		++q->counter; -		ret = tfifo_enqueue(skb, sch); +		tfifo_enqueue(skb, sch);  	} else {  		/*  		 * Do re-ordering by putting one out of N packets at the front @@ -460,16 +457,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)  		q->counter = 0;  		__skb_queue_head(&sch->q, skb); -		sch->qstats.backlog += qdisc_pkt_len(skb);  		sch->qstats.requeues++; -		ret = NET_XMIT_SUCCESS; -	} - -	if (ret != NET_XMIT_SUCCESS) { -		if (net_xmit_drop_count(ret)) { -			sch->qstats.drops++; -			return ret; -		}  	}  	return NET_XMIT_SUCCESS; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 74305c883bd..30ea4674cab 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -570,6 +570,8 @@ static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb)  	sch->qstats.backlog = q->qdisc->qstats.backlog;  	opts = nla_nest_start(skb, TCA_OPTIONS); +	if (opts == NULL) +		goto nla_put_failure;  	if (nla_put(skb, TCA_SFB_PARMS, sizeof(opt), &opt))  		goto nla_put_failure;  	return nla_nest_end(skb, opts); diff --git a/net/sctp/input.c b/net/sctp/input.c index 80564fe0302..8b9b6790a3d 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -736,15 +736,12 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep)  	epb = &ep->base; -	if (hlist_unhashed(&epb->node)) -		return; -  	epb->hashent = sctp_ep_hashfn(epb->bind_addr.port);  	head = &sctp_ep_hashtable[epb->hashent];  	sctp_write_lock(&head->lock); -	__hlist_del(&epb->node); +	hlist_del_init(&epb->node);  	sctp_write_unlock(&head->lock);  } @@ -825,7 +822,7 @@ static void __sctp_unhash_established(struct sctp_association *asoc)  	head = &sctp_assoc_hashtable[epb->hashent];  	sctp_write_lock(&head->lock); -	__hlist_del(&epb->node); +	hlist_del_init(&epb->node);  	sctp_write_unlock(&head->lock);  } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b3b8a8d813e..31c7bfcd9b5 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1231,8 +1231,14 @@ out_free:  	SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p"  			  " kaddrs: %p err: %d\n",  			  asoc, kaddrs, err); -	if (asoc) +	if (asoc) { +		/* sctp_primitive_ASSOCIATE may have added this association +		 * To the hash table, try to unhash it, just in case, its a noop +		 * if it wasn't hashed so we're safe +		 */ +		sctp_unhash_established(asoc);  		sctp_association_free(asoc); +	}  	return err;  } @@ -1942,8 +1948,10 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,  	goto out_unlock;  out_free: -	if (new_asoc) +	if (new_asoc) { +		sctp_unhash_established(asoc);  		sctp_association_free(asoc); +	}  out_unlock:  	sctp_release_sock(sk); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 372ec6502aa..ffd8900a38e 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2717,7 +2717,7 @@ static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)  			ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET))  		return dentry_has_perm(cred, dentry, FILE__SETATTR); -	if (ia_valid & ATTR_SIZE) +	if (selinux_policycap_openperm && (ia_valid & ATTR_SIZE))  		av |= FILE__OPEN;  	return dentry_has_perm(cred, dentry, av); diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index b8c53723e09..df2de54a958 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -145,7 +145,9 @@ struct security_class_mapping secclass_map[] = {  	    "node_bind", "name_connect", NULL } },  	{ "memprotect", { "mmap_zero", NULL } },  	{ "peer", { "recv", NULL } }, -	{ "capability2", { "mac_override", "mac_admin", "syslog", NULL } }, +	{ "capability2", +	  { "mac_override", "mac_admin", "syslog", "wake_alarm", "block_suspend", +	    NULL } },  	{ "kernel_service", { "use_as_override", "create_files_as", NULL } },  	{ "tun_socket",  	  { COMMON_SOCK_PERMS, NULL } }, diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 3d69aa9ff51..46c2f6b7b12 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -250,8 +250,12 @@ endef  all_objs := $(sort $(ALL_OBJS))  all_deps := $(all_objs:%.o=.%.d) +# let .d file also depends on the source and header files  define check_deps -		$(CC) -M $(CFLAGS) $< > $@; +		@set -e; $(RM) $@; \ +		$(CC) -M $(CFLAGS) $< > $@.$$$$; \ +		sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ +		$(RM) $@.$$$$  endef  $(gui_deps): ks_version.h @@ -270,11 +274,13 @@ endif  tags:	force  	$(RM) tags -	find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px +	find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ +	--regex-c++='/_PE\(([^,)]*).*/PEVENT_ERRNO__\1/'  TAGS:	force  	$(RM) TAGS -	find . -name '*.[ch]' | xargs etags +	find . -name '*.[ch]' | xargs etags \ +	--regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/'  define do_install  	$(print_install)				\ @@ -290,7 +296,7 @@ install_lib: all_cmd install_plugins install_python  install: install_lib  clean: -	$(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES).*.d +	$(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d  	$(RM) tags TAGS  endif # skip-makefile diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 554828219c3..5f34aa371b5 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -467,8 +467,10 @@ int pevent_register_function(struct pevent *pevent, char *func,  		item->mod = NULL;  	item->addr = addr; -	pevent->funclist = item; +	if (!item->func || (mod && !item->mod)) +		die("malloc func"); +	pevent->funclist = item;  	pevent->func_count++;  	return 0; @@ -511,12 +513,12 @@ struct printk_list {  static int printk_cmp(const void *a, const void *b)  { -	const struct func_map *fa = a; -	const struct func_map *fb = b; +	const struct printk_map *pa = a; +	const struct printk_map *pb = b; -	if (fa->addr < fb->addr) +	if (pa->addr < pb->addr)  		return -1; -	if (fa->addr > fb->addr) +	if (pa->addr > pb->addr)  		return 1;  	return 0; @@ -583,10 +585,13 @@ int pevent_register_print_string(struct pevent *pevent, char *fmt,  	item = malloc_or_die(sizeof(*item));  	item->next = pevent->printklist; -	pevent->printklist = item;  	item->printk = strdup(fmt);  	item->addr = addr; +	if (!item->printk) +		die("malloc fmt"); + +	pevent->printklist = item;  	pevent->printk_count++;  	return 0; @@ -616,7 +621,9 @@ static struct event_format *alloc_event(void)  {  	struct event_format *event; -	event = malloc_or_die(sizeof(*event)); +	event = malloc(sizeof(*event)); +	if (!event) +		return NULL;  	memset(event, 0, sizeof(*event));  	return event; @@ -626,12 +633,8 @@ static void add_event(struct pevent *pevent, struct event_format *event)  {  	int i; -	if (!pevent->events) -		pevent->events = malloc_or_die(sizeof(event)); -	else -		pevent->events = -			realloc(pevent->events, sizeof(event) * -				(pevent->nr_events + 1)); +	pevent->events = realloc(pevent->events, sizeof(event) * +				 (pevent->nr_events + 1));  	if (!pevent->events)  		die("Can not allocate events"); @@ -697,6 +700,10 @@ static void free_arg(struct print_arg *arg)  		free_arg(arg->symbol.field);  		free_flag_sym(arg->symbol.symbols);  		break; +	case PRINT_HEX: +		free_arg(arg->hex.field); +		free_arg(arg->hex.size); +		break;  	case PRINT_TYPE:  		free(arg->typecast.type);  		free_arg(arg->typecast.item); @@ -775,6 +782,25 @@ int pevent_peek_char(void)  	return __peek_char();  } +static int extend_token(char **tok, char *buf, int size) +{ +	char *newtok = realloc(*tok, size); + +	if (!newtok) { +		free(*tok); +		*tok = NULL; +		return -1; +	} + +	if (!*tok) +		strcpy(newtok, buf); +	else +		strcat(newtok, buf); +	*tok = newtok; + +	return 0; +} +  static enum event_type force_token(const char *str, char **tok);  static enum event_type __read_token(char **tok) @@ -859,17 +885,10 @@ static enum event_type __read_token(char **tok)  		do {  			if (i == (BUFSIZ - 1)) {  				buf[i] = 0; -				if (*tok) { -					*tok = realloc(*tok, tok_size + BUFSIZ); -					if (!*tok) -						return EVENT_NONE; -					strcat(*tok, buf); -				} else -					*tok = strdup(buf); +				tok_size += BUFSIZ; -				if (!*tok) +				if (extend_token(tok, buf, tok_size) < 0)  					return EVENT_NONE; -				tok_size += BUFSIZ;  				i = 0;  			}  			last_ch = ch; @@ -908,17 +927,10 @@ static enum event_type __read_token(char **tok)  	while (get_type(__peek_char()) == type) {  		if (i == (BUFSIZ - 1)) {  			buf[i] = 0; -			if (*tok) { -				*tok = realloc(*tok, tok_size + BUFSIZ); -				if (!*tok) -					return EVENT_NONE; -				strcat(*tok, buf); -			} else -				*tok = strdup(buf); +			tok_size += BUFSIZ; -			if (!*tok) +			if (extend_token(tok, buf, tok_size) < 0)  				return EVENT_NONE; -			tok_size += BUFSIZ;  			i = 0;  		}  		ch = __read_char(); @@ -927,14 +939,7 @@ static enum event_type __read_token(char **tok)   out:  	buf[i] = 0; -	if (*tok) { -		*tok = realloc(*tok, tok_size + i); -		if (!*tok) -			return EVENT_NONE; -		strcat(*tok, buf); -	} else -		*tok = strdup(buf); -	if (!*tok) +	if (extend_token(tok, buf, tok_size + i + 1) < 0)  		return EVENT_NONE;  	if (type == EVENT_ITEM) { @@ -1255,9 +1260,15 @@ static int event_read_fields(struct event_format *event, struct format_field **f  					field->flags |= FIELD_IS_POINTER;  				if (field->type) { -					field->type = realloc(field->type, -							      strlen(field->type) + -							      strlen(last_token) + 2); +					char *new_type; +					new_type = realloc(field->type, +							   strlen(field->type) + +							   strlen(last_token) + 2); +					if (!new_type) { +						free(last_token); +						goto fail; +					} +					field->type = new_type;  					strcat(field->type, " ");  					strcat(field->type, last_token);  					free(last_token); @@ -1282,6 +1293,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f  		if (strcmp(token, "[") == 0) {  			enum event_type last_type = type;  			char *brackets = token; +			char *new_brackets;  			int len;  			field->flags |= FIELD_IS_ARRAY; @@ -1301,9 +1313,14 @@ static int event_read_fields(struct event_format *event, struct format_field **f  					len = 1;  				last_type = type; -				brackets = realloc(brackets, -						   strlen(brackets) + -						   strlen(token) + len); +				new_brackets = realloc(brackets, +						       strlen(brackets) + +						       strlen(token) + len); +				if (!new_brackets) { +					free(brackets); +					goto fail; +				} +				brackets = new_brackets;  				if (len == 2)  					strcat(brackets, " ");  				strcat(brackets, token); @@ -1319,7 +1336,12 @@ static int event_read_fields(struct event_format *event, struct format_field **f  			free_token(token); -			brackets = realloc(brackets, strlen(brackets) + 2); +			new_brackets = realloc(brackets, strlen(brackets) + 2); +			if (!new_brackets) { +				free(brackets); +				goto fail; +			} +			brackets = new_brackets;  			strcat(brackets, "]");  			/* add brackets to type */ @@ -1330,10 +1352,16 @@ static int event_read_fields(struct event_format *event, struct format_field **f  			 * the format: type [] item;  			 */  			if (type == EVENT_ITEM) { -				field->type = realloc(field->type, -						      strlen(field->type) + -						      strlen(field->name) + -						      strlen(brackets) + 2); +				char *new_type; +				new_type = realloc(field->type, +						   strlen(field->type) + +						   strlen(field->name) + +						   strlen(brackets) + 2); +				if (!new_type) { +					free(brackets); +					goto fail; +				} +				field->type = new_type;  				strcat(field->type, " ");  				strcat(field->type, field->name);  				free_token(field->name); @@ -1341,9 +1369,15 @@ static int event_read_fields(struct event_format *event, struct format_field **f  				field->name = token;  				type = read_token(&token);  			} else { -				field->type = realloc(field->type, -						      strlen(field->type) + -						      strlen(brackets) + 1); +				char *new_type; +				new_type = realloc(field->type, +						   strlen(field->type) + +						   strlen(brackets) + 1); +				if (!new_type) { +					free(brackets); +					goto fail; +				} +				field->type = new_type;  				strcat(field->type, brackets);  			}  			free(brackets); @@ -1726,10 +1760,16 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)  		/* could just be a type pointer */  		if ((strcmp(arg->op.op, "*") == 0) &&  		    type == EVENT_DELIM && (strcmp(token, ")") == 0)) { +			char *new_atom; +  			if (left->type != PRINT_ATOM)  				die("bad pointer type"); -			left->atom.atom = realloc(left->atom.atom, +			new_atom = realloc(left->atom.atom,  					    strlen(left->atom.atom) + 3); +			if (!new_atom) +				goto out_free; + +			left->atom.atom = new_atom;  			strcat(left->atom.atom, " *");  			free(arg->op.op);  			*arg = *left; @@ -2146,6 +2186,8 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char **  		if (value == NULL)  			goto out_free;  		field->value = strdup(value); +		if (field->value == NULL) +			goto out_free;  		free_arg(arg);  		arg = alloc_arg(); @@ -2159,6 +2201,8 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char **  		if (value == NULL)  			goto out_free;  		field->str = strdup(value); +		if (field->str == NULL) +			goto out_free;  		free_arg(arg);  		arg = NULL; @@ -2260,6 +2304,45 @@ process_symbols(struct event_format *event, struct print_arg *arg, char **tok)  }  static enum event_type +process_hex(struct event_format *event, struct print_arg *arg, char **tok) +{ +	struct print_arg *field; +	enum event_type type; +	char *token; + +	memset(arg, 0, sizeof(*arg)); +	arg->type = PRINT_HEX; + +	field = alloc_arg(); +	type = process_arg(event, field, &token); + +	if (test_type_token(type, token, EVENT_DELIM, ",")) +		goto out_free; + +	arg->hex.field = field; + +	free_token(token); + +	field = alloc_arg(); +	type = process_arg(event, field, &token); + +	if (test_type_token(type, token, EVENT_DELIM, ")")) +		goto out_free; + +	arg->hex.size = field; + +	free_token(token); +	type = read_token_item(tok); +	return type; + + out_free: +	free_arg(field); +	free_token(token); +	*tok = NULL; +	return EVENT_ERROR; +} + +static enum event_type  process_dynamic_array(struct event_format *event, struct print_arg *arg, char **tok)  {  	struct format_field *field; @@ -2488,6 +2571,10 @@ process_function(struct event_format *event, struct print_arg *arg,  		is_symbolic_field = 1;  		return process_symbols(event, arg, tok);  	} +	if (strcmp(token, "__print_hex") == 0) { +		free_token(token); +		return process_hex(event, arg, tok); +	}  	if (strcmp(token, "__get_str") == 0) {  		free_token(token);  		return process_str(event, arg, tok); @@ -2541,7 +2628,16 @@ process_arg_token(struct event_format *event, struct print_arg *arg,  		}  		/* atoms can be more than one token long */  		while (type == EVENT_ITEM) { -			atom = realloc(atom, strlen(atom) + strlen(token) + 2); +			char *new_atom; +			new_atom = realloc(atom, +					   strlen(atom) + strlen(token) + 2); +			if (!new_atom) { +				free(atom); +				*tok = NULL; +				free_token(token); +				return EVENT_ERROR; +			} +			atom = new_atom;  			strcat(atom, " ");  			strcat(atom, token);  			free_token(token); @@ -2835,7 +2931,7 @@ static int get_common_info(struct pevent *pevent,  	event = pevent->events[0];  	field = pevent_find_common_field(event, type);  	if (!field) -		die("field '%s' not found", type); +		return -1;  	*offset = field->offset;  	*size = field->size; @@ -2886,15 +2982,16 @@ static int parse_common_flags(struct pevent *pevent, void *data)  static int parse_common_lock_depth(struct pevent *pevent, void *data)  { -	int ret; - -	ret = __parse_common(pevent, data, -			     &pevent->ld_size, &pevent->ld_offset, -			     "common_lock_depth"); -	if (ret < 0) -		return -1; +	return __parse_common(pevent, data, +			      &pevent->ld_size, &pevent->ld_offset, +			      "common_lock_depth"); +} -	return ret; +static int parse_common_migrate_disable(struct pevent *pevent, void *data) +{ +	return __parse_common(pevent, data, +			      &pevent->ld_size, &pevent->ld_offset, +			      "common_migrate_disable");  }  static int events_id_cmp(const void *a, const void *b); @@ -2995,6 +3092,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg  		break;  	case PRINT_FLAGS:  	case PRINT_SYMBOL: +	case PRINT_HEX:  		break;  	case PRINT_TYPE:  		val = eval_num_arg(data, size, event, arg->typecast.item); @@ -3214,11 +3312,13 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,  {  	struct pevent *pevent = event->pevent;  	struct print_flag_sym *flag; +	struct format_field *field;  	unsigned long long val, fval;  	unsigned long addr;  	char *str; +	unsigned char *hex;  	int print; -	int len; +	int i, len;  	switch (arg->type) {  	case PRINT_NULL: @@ -3228,27 +3328,29 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,  		print_str_to_seq(s, format, len_arg, arg->atom.atom);  		return;  	case PRINT_FIELD: -		if (!arg->field.field) { -			arg->field.field = pevent_find_any_field(event, arg->field.name); -			if (!arg->field.field) +		field = arg->field.field; +		if (!field) { +			field = pevent_find_any_field(event, arg->field.name); +			if (!field)  				die("field %s not found", arg->field.name); +			arg->field.field = field;  		}  		/* Zero sized fields, mean the rest of the data */ -		len = arg->field.field->size ? : size - arg->field.field->offset; +		len = field->size ? : size - field->offset;  		/*  		 * Some events pass in pointers. If this is not an array  		 * and the size is the same as long_size, assume that it  		 * is a pointer.  		 */ -		if (!(arg->field.field->flags & FIELD_IS_ARRAY) && -		    arg->field.field->size == pevent->long_size) { -			addr = *(unsigned long *)(data + arg->field.field->offset); +		if (!(field->flags & FIELD_IS_ARRAY) && +		    field->size == pevent->long_size) { +			addr = *(unsigned long *)(data + field->offset);  			trace_seq_printf(s, "%lx", addr);  			break;  		}  		str = malloc_or_die(len + 1); -		memcpy(str, data + arg->field.field->offset, len); +		memcpy(str, data + field->offset, len);  		str[len] = 0;  		print_str_to_seq(s, format, len_arg, str);  		free(str); @@ -3281,6 +3383,23 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,  			}  		}  		break; +	case PRINT_HEX: +		field = arg->hex.field->field.field; +		if (!field) { +			str = arg->hex.field->field.name; +			field = pevent_find_any_field(event, str); +			if (!field) +				die("field %s not found", str); +			arg->hex.field->field.field = field; +		} +		hex = data + field->offset; +		len = eval_num_arg(data, size, event, arg->hex.size); +		for (i = 0; i < len; i++) { +			if (i) +				trace_seq_putc(s, ' '); +			trace_seq_printf(s, "%02x", hex[i]); +		} +		break;  	case PRINT_TYPE:  		break; @@ -3299,7 +3418,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,  		break;  	}  	case PRINT_BSTRING: -		trace_seq_printf(s, format, arg->string.string); +		print_str_to_seq(s, format, len_arg, arg->string.string);  		break;  	case PRINT_OP:  		/* @@ -3363,6 +3482,10 @@ process_defined_func(struct trace_seq *s, void *data, int size,  			string = malloc_or_die(sizeof(*string));  			string->next = strings;  			string->str = strdup(str.buffer); +			if (!string->str) +				die("malloc str"); + +			args[i] = (unsigned long long)string->str;  			strings = string;  			trace_seq_destroy(&str);  			break; @@ -3400,6 +3523,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc  	unsigned long long ip, val;  	char *ptr;  	void *bptr; +	int vsize;  	field = pevent->bprint_buf_field;  	ip_field = pevent->bprint_ip_field; @@ -3448,6 +3572,8 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc  				goto process_again;  			case '0' ... '9':  				goto process_again; +			case '.': +				goto process_again;  			case 'p':  				ls = 1;  				/* fall through */ @@ -3455,23 +3581,30 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc  			case 'u':  			case 'x':  			case 'i': -				/* the pointers are always 4 bytes aligned */ -				bptr = (void *)(((unsigned long)bptr + 3) & -						~3);  				switch (ls) {  				case 0: -					ls = 4; +					vsize = 4;  					break;  				case 1: -					ls = pevent->long_size; +					vsize = pevent->long_size;  					break;  				case 2: -					ls = 8; +					vsize = 8; +					break;  				default: +					vsize = ls; /* ? */  					break;  				} -				val = pevent_read_number(pevent, bptr, ls); -				bptr += ls; +			/* fall through */ +			case '*': +				if (*ptr == '*') +					vsize = 4; + +				/* the pointers are always 4 bytes aligned */ +				bptr = (void *)(((unsigned long)bptr + 3) & +						~3); +				val = pevent_read_number(pevent, bptr, vsize); +				bptr += vsize;  				arg = alloc_arg();  				arg->next = NULL;  				arg->type = PRINT_ATOM; @@ -3479,12 +3612,21 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc  				sprintf(arg->atom.atom, "%lld", val);  				*next = arg;  				next = &arg->next; +				/* +				 * The '*' case means that an arg is used as the length. +				 * We need to continue to figure out for what. +				 */ +				if (*ptr == '*') +					goto process_again; +  				break;  			case 's':  				arg = alloc_arg();  				arg->next = NULL;  				arg->type = PRINT_BSTRING;  				arg->string.string = strdup(bptr); +				if (!arg->string.string) +					break;  				bptr += strlen(bptr) + 1;  				*next = arg;  				next = &arg->next; @@ -3589,6 +3731,16 @@ static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size,  	trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);  } +static int is_printable_array(char *p, unsigned int len) +{ +	unsigned int i; + +	for (i = 0; i < len && p[i]; i++) +		if (!isprint(p[i])) +		    return 0; +	return 1; +} +  static void print_event_fields(struct trace_seq *s, void *data, int size,  			       struct event_format *event)  { @@ -3608,7 +3760,8 @@ static void print_event_fields(struct trace_seq *s, void *data, int size,  				len = offset >> 16;  				offset &= 0xffff;  			} -			if (field->flags & FIELD_IS_STRING) { +			if (field->flags & FIELD_IS_STRING && +			    is_printable_array(data + offset, len)) {  				trace_seq_printf(s, "%s", (char *)data + offset);  			} else {  				trace_seq_puts(s, "ARRAY["); @@ -3619,6 +3772,7 @@ static void print_event_fields(struct trace_seq *s, void *data, int size,  							 *((unsigned char *)data + offset + i));  				}  				trace_seq_putc(s, ']'); +				field->flags &= ~FIELD_IS_STRING;  			}  		} else {  			val = pevent_read_number(event->pevent, data + field->offset, @@ -3758,6 +3912,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event  				} else if (*(ptr+1) == 'M' || *(ptr+1) == 'm') {  					print_mac_arg(s, *(ptr+1), data, size, event, arg);  					ptr++; +					arg = arg->next;  					break;  				} @@ -3794,14 +3949,15 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event  						break;  					}  				} -				if (pevent->long_size == 8 && ls) { +				if (pevent->long_size == 8 && ls && +				    sizeof(long) != 8) {  					char *p;  					ls = 2;  					/* make %l into %ll */  					p = strchr(format, 'l');  					if (p) -						memmove(p, p+1, strlen(p)+1); +						memmove(p+1, p, strlen(p)+1);  					else if (strcmp(format, "%p") == 0)  						strcpy(format, "0x%llx");  				} @@ -3878,8 +4034,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event   * pevent_data_lat_fmt - parse the data for the latency format   * @pevent: a handle to the pevent   * @s: the trace_seq to write to - * @data: the raw data to read from - * @size: currently unused. + * @record: the record to read from   *   * This parses out the Latency format (interrupts disabled,   * need rescheduling, in hard/soft interrupt, preempt count @@ -3889,10 +4044,13 @@ void pevent_data_lat_fmt(struct pevent *pevent,  			 struct trace_seq *s, struct pevent_record *record)  {  	static int check_lock_depth = 1; +	static int check_migrate_disable = 1;  	static int lock_depth_exists; +	static int migrate_disable_exists;  	unsigned int lat_flags;  	unsigned int pc;  	int lock_depth; +	int migrate_disable;  	int hardirq;  	int softirq;  	void *data = record->data; @@ -3900,18 +4058,26 @@ void pevent_data_lat_fmt(struct pevent *pevent,  	lat_flags = parse_common_flags(pevent, data);  	pc = parse_common_pc(pevent, data);  	/* lock_depth may not always exist */ -	if (check_lock_depth) { -		struct format_field *field; -		struct event_format *event; - -		check_lock_depth = 0; -		event = pevent->events[0]; -		field = pevent_find_common_field(event, "common_lock_depth"); -		if (field) -			lock_depth_exists = 1; -	}  	if (lock_depth_exists)  		lock_depth = parse_common_lock_depth(pevent, data); +	else if (check_lock_depth) { +		lock_depth = parse_common_lock_depth(pevent, data); +		if (lock_depth < 0) +			check_lock_depth = 0; +		else +			lock_depth_exists = 1; +	} + +	/* migrate_disable may not always exist */ +	if (migrate_disable_exists) +		migrate_disable = parse_common_migrate_disable(pevent, data); +	else if (check_migrate_disable) { +		migrate_disable = parse_common_migrate_disable(pevent, data); +		if (migrate_disable < 0) +			check_migrate_disable = 0; +		else +			migrate_disable_exists = 1; +	}  	hardirq = lat_flags & TRACE_FLAG_HARDIRQ;  	softirq = lat_flags & TRACE_FLAG_SOFTIRQ; @@ -3930,6 +4096,13 @@ void pevent_data_lat_fmt(struct pevent *pevent,  	else  		trace_seq_putc(s, '.'); +	if (migrate_disable_exists) { +		if (migrate_disable < 0) +			trace_seq_putc(s, '.'); +		else +			trace_seq_printf(s, "%d", migrate_disable); +	} +  	if (lock_depth_exists) {  		if (lock_depth < 0)  			trace_seq_putc(s, '.'); @@ -3996,10 +4169,7 @@ const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid)   * pevent_data_comm_from_pid - parse the data into the print format   * @s: the trace_seq to write to   * @event: the handle to the event - * @cpu: the cpu the event was recorded on - * @data: the raw data - * @size: the size of the raw data - * @nsecs: the timestamp of the event + * @record: the record to read from   *   * This parses the raw @data using the given @event information and   * writes the print format into the trace_seq. @@ -4279,6 +4449,13 @@ static void print_args(struct print_arg *args)  		trace_seq_destroy(&s);  		printf(")");  		break; +	case PRINT_HEX: +		printf("__print_hex("); +		print_args(args->hex.field); +		printf(", "); +		print_args(args->hex.size); +		printf(")"); +		break;  	case PRINT_STRING:  	case PRINT_BSTRING:  		printf("__get_str(%s)", args->string.string); @@ -4541,6 +4718,8 @@ int pevent_parse_event(struct pevent *pevent,  		die("failed to read event id");  	event->system = strdup(sys); +	if (!event->system) +		die("failed to allocate system");  	/* Add pevent to event so that it can be referenced */  	event->pevent = pevent; @@ -4582,6 +4761,11 @@ int pevent_parse_event(struct pevent *pevent,  			list = &arg->next;  			arg->type = PRINT_FIELD;  			arg->field.name = strdup(field->name); +			if (!arg->field.name) { +				do_warning("failed to allocate field name"); +				event->flags |= EVENT_FL_FAILED; +				return -1; +			}  			arg->field.field = field;  		}  		return 0; @@ -4753,7 +4937,7 @@ int pevent_get_any_field_val(struct trace_seq *s, struct event_format *event,   * @record: The record with the field name.   * @err: print default error if failed.   * - * Returns: 0 on success, -1 field not fould, or 1 if buffer is full. + * Returns: 0 on success, -1 field not found, or 1 if buffer is full.   */  int pevent_print_num_field(struct trace_seq *s, const char *fmt,  			   struct event_format *event, const char *name, @@ -4795,11 +4979,12 @@ static void free_func_handle(struct pevent_function_handler *func)   * pevent_register_print_function - register a helper function   * @pevent: the handle to the pevent   * @func: the function to process the helper function + * @ret_type: the return type of the helper function   * @name: the name of the helper function   * @parameters: A list of enum pevent_func_arg_type   *   * Some events may have helper functions in the print format arguments. - * This allows a plugin to dynmically create a way to process one + * This allows a plugin to dynamically create a way to process one   * of these functions.   *   * The @parameters is a variable list of pevent_func_arg_type enums that @@ -4870,12 +5055,13 @@ int pevent_register_print_function(struct pevent *pevent,  }  /** - * pevent_register_event_handle - register a way to parse an event + * pevent_register_event_handler - register a way to parse an event   * @pevent: the handle to the pevent   * @id: the id of the event to register   * @sys_name: the system name the event belongs to   * @event_name: the name of the event   * @func: the function to call to parse the event information + * @context: the data to be passed to @func   *   * This function allows a developer to override the parsing of   * a given event. If for some reason the default print format @@ -4925,6 +5111,11 @@ int pevent_register_event_handler(struct pevent *pevent,  	if (sys_name)  		handle->sys_name = strdup(sys_name); +	if ((event_name && !handle->event_name) || +	    (sys_name && !handle->sys_name)) { +		die("Failed to allocate event/sys name"); +	} +  	handle->func = func;  	handle->next = pevent->handlers;  	pevent->handlers = handle; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index ac997bc7b59..5772ad8cb38 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -226,6 +226,11 @@ struct print_arg_symbol {  	struct print_flag_sym	*symbols;  }; +struct print_arg_hex { +	struct print_arg	*field; +	struct print_arg	*size; +}; +  struct print_arg_dynarray {  	struct format_field	*field;  	struct print_arg	*index; @@ -253,6 +258,7 @@ enum print_arg_type {  	PRINT_FIELD,  	PRINT_FLAGS,  	PRINT_SYMBOL, +	PRINT_HEX,  	PRINT_TYPE,  	PRINT_STRING,  	PRINT_BSTRING, @@ -270,6 +276,7 @@ struct print_arg {  		struct print_arg_typecast	typecast;  		struct print_arg_flags		flags;  		struct print_arg_symbol		symbol; +		struct print_arg_hex		hex;  		struct print_arg_func		func;  		struct print_arg_string		string;  		struct print_arg_op		op; diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index dfcfe2c131d..ad17855528f 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -96,7 +96,7 @@ static enum event_type read_token(char **tok)  	    (strcmp(token, "=") == 0 || strcmp(token, "!") == 0) &&  	    pevent_peek_char() == '~') {  		/* append it */ -		*tok = malloc(3); +		*tok = malloc_or_die(3);  		sprintf(*tok, "%c%c", *token, '~');  		free_token(token);  		/* Now remove the '~' from the buffer */ @@ -148,17 +148,11 @@ add_filter_type(struct event_filter *filter, int id)  	if (filter_type)  		return filter_type; -	if (!filter->filters) -		filter->event_filters = -			malloc_or_die(sizeof(*filter->event_filters)); -	else { -		filter->event_filters = -			realloc(filter->event_filters, -				sizeof(*filter->event_filters) * -				(filter->filters + 1)); -		if (!filter->event_filters) -			die("Could not allocate filter"); -	} +	filter->event_filters =	realloc(filter->event_filters, +					sizeof(*filter->event_filters) * +					(filter->filters + 1)); +	if (!filter->event_filters) +		die("Could not allocate filter");  	for (i = 0; i < filter->filters; i++) {  		if (filter->event_filters[i].event_id > id) @@ -1480,7 +1474,7 @@ void pevent_filter_clear_trivial(struct event_filter *filter,  {  	struct filter_type *filter_type;  	int count = 0; -	int *ids; +	int *ids = NULL;  	int i;  	if (!filter->filters) @@ -1504,10 +1498,8 @@ void pevent_filter_clear_trivial(struct event_filter *filter,  		default:  			break;  		} -		if (count) -			ids = realloc(ids, sizeof(*ids) * (count + 1)); -		else -			ids = malloc(sizeof(*ids)); + +		ids = realloc(ids, sizeof(*ids) * (count + 1));  		if (!ids)  			die("Can't allocate ids");  		ids[count++] = filter_type->event_id; @@ -1710,18 +1702,43 @@ static int test_num(struct event_format *event,  static const char *get_field_str(struct filter_arg *arg, struct pevent_record *record)  { -	const char *val = record->data + arg->str.field->offset; +	struct event_format *event; +	struct pevent *pevent; +	unsigned long long addr; +	const char *val = NULL; +	char hex[64]; -	/* -	 * We need to copy the data since we can't be sure the field -	 * is null terminated. -	 */ -	if (*(val + arg->str.field->size - 1)) { -		/* copy it */ -		memcpy(arg->str.buffer, val, arg->str.field->size); -		/* the buffer is already NULL terminated */ -		val = arg->str.buffer; +	/* If the field is not a string convert it */ +	if (arg->str.field->flags & FIELD_IS_STRING) { +		val = record->data + arg->str.field->offset; + +		/* +		 * We need to copy the data since we can't be sure the field +		 * is null terminated. +		 */ +		if (*(val + arg->str.field->size - 1)) { +			/* copy it */ +			memcpy(arg->str.buffer, val, arg->str.field->size); +			/* the buffer is already NULL terminated */ +			val = arg->str.buffer; +		} + +	} else { +		event = arg->str.field->event; +		pevent = event->pevent; +		addr = get_value(event, arg->str.field, record); + +		if (arg->str.field->flags & (FIELD_IS_POINTER | FIELD_IS_LONG)) +			/* convert to a kernel symbol */ +			val = pevent_find_function(pevent, addr); + +		if (val == NULL) { +			/* just use the hex of the string name */ +			snprintf(hex, 64, "0x%llx", addr); +			val = hex; +		}  	} +  	return val;  } @@ -2001,11 +2018,13 @@ static char *exp_to_str(struct event_filter *filter, struct filter_arg *arg)  	char *lstr;  	char *rstr;  	char *op; -	char *str; +	char *str = NULL;  	int len;  	lstr = arg_to_str(filter, arg->exp.left);  	rstr = arg_to_str(filter, arg->exp.right); +	if (!lstr || !rstr) +		goto out;  	switch (arg->exp.type) {  	case FILTER_EXP_ADD: @@ -2045,6 +2064,7 @@ static char *exp_to_str(struct event_filter *filter, struct filter_arg *arg)  	len = strlen(op) + strlen(lstr) + strlen(rstr) + 4;  	str = malloc_or_die(len);  	snprintf(str, len, "%s %s %s", lstr, op, rstr); +out:  	free(lstr);  	free(rstr); @@ -2061,6 +2081,8 @@ static char *num_to_str(struct event_filter *filter, struct filter_arg *arg)  	lstr = arg_to_str(filter, arg->num.left);  	rstr = arg_to_str(filter, arg->num.right); +	if (!lstr || !rstr) +		goto out;  	switch (arg->num.type) {  	case FILTER_CMP_EQ: @@ -2097,6 +2119,7 @@ static char *num_to_str(struct event_filter *filter, struct filter_arg *arg)  		break;  	} +out:  	free(lstr);  	free(rstr);  	return str; @@ -2247,7 +2270,12 @@ int pevent_filter_compare(struct event_filter *filter1, struct event_filter *fil  		/* The best way to compare complex filters is with strings */  		str1 = arg_to_str(filter1, filter_type1->filter);  		str2 = arg_to_str(filter2, filter_type2->filter); -		result = strcmp(str1, str2) != 0; +		if (str1 && str2) +			result = strcmp(str1, str2) != 0; +		else +			/* bail out if allocation fails */ +			result = 1; +  		free(str1);  		free(str2);  		if (result) diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index a3dbadb26ef..7065cd6fbdf 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -12,7 +12,7 @@ SYNOPSIS  DESCRIPTION  ----------- -This 'perf bench' command is general framework for benchmark suites. +This 'perf bench' command is a general framework for benchmark suites.  COMMON OPTIONS  -------------- @@ -45,14 +45,20 @@ SUBSYSTEM  'sched'::  	Scheduler and IPC mechanisms. +'mem':: +	Memory access performance. + +'all':: +	All benchmark subsystems. +  SUITES FOR 'sched'  ~~~~~~~~~~~~~~~~~~  *messaging*::  Suite for evaluating performance of scheduler and IPC mechanisms.  Based on hackbench by Rusty Russell. -Options of *pipe* -^^^^^^^^^^^^^^^^^ +Options of *messaging* +^^^^^^^^^^^^^^^^^^^^^^  -p::  --pipe::  Use pipe() instead of socketpair() @@ -115,6 +121,72 @@ Example of *pipe*                  59004 ops/sec  --------------------- +SUITES FOR 'mem' +~~~~~~~~~~~~~~~~ +*memcpy*:: +Suite for evaluating performance of simple memory copy in various ways. + +Options of *memcpy* +^^^^^^^^^^^^^^^^^^^ +-l:: +--length:: +Specify length of memory to copy (default: 1MB). +Available units are B, KB, MB, GB and TB (case insensitive). + +-r:: +--routine:: +Specify routine to copy (default: default). +Available routines are depend on the architecture. +On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported. + +-i:: +--iterations:: +Repeat memcpy invocation this number of times. + +-c:: +--cycle:: +Use perf's cpu-cycles event instead of gettimeofday syscall. + +-o:: +--only-prefault:: +Show only the result with page faults before memcpy. + +-n:: +--no-prefault:: +Show only the result without page faults before memcpy. + +*memset*:: +Suite for evaluating performance of simple memory set in various ways. + +Options of *memset* +^^^^^^^^^^^^^^^^^^^ +-l:: +--length:: +Specify length of memory to set (default: 1MB). +Available units are B, KB, MB, GB and TB (case insensitive). + +-r:: +--routine:: +Specify routine to set (default: default). +Available routines are depend on the architecture. +On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported. + +-i:: +--iterations:: +Repeat memset invocation this number of times. + +-c:: +--cycle:: +Use perf's cpu-cycles event instead of gettimeofday syscall. + +-o:: +--only-prefault:: +Show only the result with page faults before memset. + +-n:: +--no-prefault:: +Show only the result without page faults before memset. +  SEE ALSO  --------  linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 2d89f02719b..495210a612c 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -57,7 +57,7 @@ OPTIONS  -s::  --sort=:: -	Sort by key(s): pid, comm, dso, symbol, parent. +	Sort by key(s): pid, comm, dso, symbol, parent, srcline.  -p::  --parent=<regex>:: diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 4a5680cb242..5b80d84d6b4 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -112,7 +112,7 @@ Default is to monitor all CPUS.  -s::  --sort:: -	Sort by key(s): pid, comm, dso, symbol, parent +	Sort by key(s): pid, comm, dso, symbol, parent, srcline.  -n::  --show-nr-samples:: diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 0eee64cfe9a..75d74e5db8d 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -155,7 +155,7 @@ endif  ### --- END CONFIGURATION SECTION --- -BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -I$(OUTPUT)/util -I$(TRACE_EVENT_DIR) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE +BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -I$(OUTPUT)util -I$(TRACE_EVENT_DIR) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE  BASIC_LDFLAGS =  # Guard against environment variables @@ -503,6 +503,7 @@ else  		LIB_OBJS += $(OUTPUT)ui/progress.o  		LIB_OBJS += $(OUTPUT)ui/util.o  		LIB_OBJS += $(OUTPUT)ui/tui/setup.o +		LIB_OBJS += $(OUTPUT)ui/tui/util.o  		LIB_H += ui/browser.h  		LIB_H += ui/browsers/map.h  		LIB_H += ui/helpline.h @@ -522,13 +523,18 @@ else  		msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev);  		BASIC_CFLAGS += -DNO_GTK2_SUPPORT  	else +		ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2)),y) +			BASIC_CFLAGS += -DHAVE_GTK_INFO_BAR +		endif  		BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0)  		EXTLIBS += $(shell pkg-config --libs gtk+-2.0)  		LIB_OBJS += $(OUTPUT)ui/gtk/browser.o  		LIB_OBJS += $(OUTPUT)ui/gtk/setup.o +		LIB_OBJS += $(OUTPUT)ui/gtk/util.o  		# Make sure that it'd be included only once.  		ifneq ($(findstring -DNO_NEWT_SUPPORT,$(BASIC_CFLAGS)),)  			LIB_OBJS += $(OUTPUT)ui/setup.o +			LIB_OBJS += $(OUTPUT)ui/util.o  		endif  	endif  endif diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 71557225bf9..02dad5d3359 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -24,21 +24,21 @@  static const char	*length_str	= "1MB";  static const char	*routine	= "default";  static int		iterations	= 1; -static bool		use_clock; -static int		clock_fd; +static bool		use_cycle; +static int		cycle_fd;  static bool		only_prefault;  static bool		no_prefault;  static const struct option options[] = {  	OPT_STRING('l', "length", &length_str, "1MB",  		    "Specify length of memory to copy. " -		    "available unit: B, MB, GB (upper and lower)"), +		    "Available units: B, KB, MB, GB and TB (upper and lower)"),  	OPT_STRING('r', "routine", &routine, "default",  		    "Specify routine to copy"),  	OPT_INTEGER('i', "iterations", &iterations,  		    "repeat memcpy() invocation this number of times"), -	OPT_BOOLEAN('c', "clock", &use_clock, -		    "Use CPU clock for measuring"), +	OPT_BOOLEAN('c', "cycle", &use_cycle, +		    "Use cycles event instead of gettimeofday() for measuring"),  	OPT_BOOLEAN('o', "only-prefault", &only_prefault,  		    "Show only the result with page faults before memcpy()"),  	OPT_BOOLEAN('n', "no-prefault", &no_prefault, @@ -76,27 +76,27 @@ static const char * const bench_mem_memcpy_usage[] = {  	NULL  }; -static struct perf_event_attr clock_attr = { +static struct perf_event_attr cycle_attr = {  	.type		= PERF_TYPE_HARDWARE,  	.config		= PERF_COUNT_HW_CPU_CYCLES  }; -static void init_clock(void) +static void init_cycle(void)  { -	clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); +	cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 0); -	if (clock_fd < 0 && errno == ENOSYS) +	if (cycle_fd < 0 && errno == ENOSYS)  		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");  	else -		BUG_ON(clock_fd < 0); +		BUG_ON(cycle_fd < 0);  } -static u64 get_clock(void) +static u64 get_cycle(void)  {  	int ret;  	u64 clk; -	ret = read(clock_fd, &clk, sizeof(u64)); +	ret = read(cycle_fd, &clk, sizeof(u64));  	BUG_ON(ret != sizeof(u64));  	return clk; @@ -119,9 +119,9 @@ static void alloc_mem(void **dst, void **src, size_t length)  		die("memory allocation failed - maybe length is too large?\n");  } -static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) +static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault)  { -	u64 clock_start = 0ULL, clock_end = 0ULL; +	u64 cycle_start = 0ULL, cycle_end = 0ULL;  	void *src = NULL, *dst = NULL;  	int i; @@ -130,14 +130,14 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)  	if (prefault)  		fn(dst, src, len); -	clock_start = get_clock(); +	cycle_start = get_cycle();  	for (i = 0; i < iterations; ++i)  		fn(dst, src, len); -	clock_end = get_clock(); +	cycle_end = get_cycle();  	free(src);  	free(dst); -	return clock_end - clock_start; +	return cycle_end - cycle_start;  }  static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) @@ -182,17 +182,17 @@ int bench_mem_memcpy(int argc, const char **argv,  	int i;  	size_t len;  	double result_bps[2]; -	u64 result_clock[2]; +	u64 result_cycle[2];  	argc = parse_options(argc, argv, options,  			     bench_mem_memcpy_usage, 0); -	if (use_clock) -		init_clock(); +	if (use_cycle) +		init_cycle();  	len = (size_t)perf_atoll((char *)length_str); -	result_clock[0] = result_clock[1] = 0ULL; +	result_cycle[0] = result_cycle[1] = 0ULL;  	result_bps[0] = result_bps[1] = 0.0;  	if ((s64)len <= 0) { @@ -223,11 +223,11 @@ int bench_mem_memcpy(int argc, const char **argv,  	if (!only_prefault && !no_prefault) {  		/* show both of results */ -		if (use_clock) { -			result_clock[0] = -				do_memcpy_clock(routines[i].fn, len, false); -			result_clock[1] = -				do_memcpy_clock(routines[i].fn, len, true); +		if (use_cycle) { +			result_cycle[0] = +				do_memcpy_cycle(routines[i].fn, len, false); +			result_cycle[1] = +				do_memcpy_cycle(routines[i].fn, len, true);  		} else {  			result_bps[0] =  				do_memcpy_gettimeofday(routines[i].fn, @@ -237,9 +237,9 @@ int bench_mem_memcpy(int argc, const char **argv,  						len, true);  		}  	} else { -		if (use_clock) { -			result_clock[pf] = -				do_memcpy_clock(routines[i].fn, +		if (use_cycle) { +			result_cycle[pf] = +				do_memcpy_cycle(routines[i].fn,  						len, only_prefault);  		} else {  			result_bps[pf] = @@ -251,12 +251,12 @@ int bench_mem_memcpy(int argc, const char **argv,  	switch (bench_format) {  	case BENCH_FORMAT_DEFAULT:  		if (!only_prefault && !no_prefault) { -			if (use_clock) { -				printf(" %14lf Clock/Byte\n", -					(double)result_clock[0] +			if (use_cycle) { +				printf(" %14lf Cycle/Byte\n", +					(double)result_cycle[0]  					/ (double)len); -				printf(" %14lf Clock/Byte (with prefault)\n", -					(double)result_clock[1] +				printf(" %14lf Cycle/Byte (with prefault)\n", +					(double)result_cycle[1]  					/ (double)len);  			} else {  				print_bps(result_bps[0]); @@ -265,9 +265,9 @@ int bench_mem_memcpy(int argc, const char **argv,  				printf(" (with prefault)\n");  			}  		} else { -			if (use_clock) { -				printf(" %14lf Clock/Byte", -					(double)result_clock[pf] +			if (use_cycle) { +				printf(" %14lf Cycle/Byte", +					(double)result_cycle[pf]  					/ (double)len);  			} else  				print_bps(result_bps[pf]); @@ -277,17 +277,17 @@ int bench_mem_memcpy(int argc, const char **argv,  		break;  	case BENCH_FORMAT_SIMPLE:  		if (!only_prefault && !no_prefault) { -			if (use_clock) { +			if (use_cycle) {  				printf("%lf %lf\n", -					(double)result_clock[0] / (double)len, -					(double)result_clock[1] / (double)len); +					(double)result_cycle[0] / (double)len, +					(double)result_cycle[1] / (double)len);  			} else {  				printf("%lf %lf\n",  					result_bps[0], result_bps[1]);  			}  		} else { -			if (use_clock) { -				printf("%lf\n", (double)result_clock[pf] +			if (use_cycle) { +				printf("%lf\n", (double)result_cycle[pf]  					/ (double)len);  			} else  				printf("%lf\n", result_bps[pf]); diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c index e9079185bd7..350cc955726 100644 --- a/tools/perf/bench/mem-memset.c +++ b/tools/perf/bench/mem-memset.c @@ -24,21 +24,21 @@  static const char	*length_str	= "1MB";  static const char	*routine	= "default";  static int		iterations	= 1; -static bool		use_clock; -static int		clock_fd; +static bool		use_cycle; +static int		cycle_fd;  static bool		only_prefault;  static bool		no_prefault;  static const struct option options[] = {  	OPT_STRING('l', "length", &length_str, "1MB", -		    "Specify length of memory to copy. " -		    "available unit: B, MB, GB (upper and lower)"), +		    "Specify length of memory to set. " +		    "Available units: B, KB, MB, GB and TB (upper and lower)"),  	OPT_STRING('r', "routine", &routine, "default", -		    "Specify routine to copy"), +		    "Specify routine to set"),  	OPT_INTEGER('i', "iterations", &iterations,  		    "repeat memset() invocation this number of times"), -	OPT_BOOLEAN('c', "clock", &use_clock, -		    "Use CPU clock for measuring"), +	OPT_BOOLEAN('c', "cycle", &use_cycle, +		    "Use cycles event instead of gettimeofday() for measuring"),  	OPT_BOOLEAN('o', "only-prefault", &only_prefault,  		    "Show only the result with page faults before memset()"),  	OPT_BOOLEAN('n', "no-prefault", &no_prefault, @@ -76,27 +76,27 @@ static const char * const bench_mem_memset_usage[] = {  	NULL  }; -static struct perf_event_attr clock_attr = { +static struct perf_event_attr cycle_attr = {  	.type		= PERF_TYPE_HARDWARE,  	.config		= PERF_COUNT_HW_CPU_CYCLES  }; -static void init_clock(void) +static void init_cycle(void)  { -	clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); +	cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 0); -	if (clock_fd < 0 && errno == ENOSYS) +	if (cycle_fd < 0 && errno == ENOSYS)  		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");  	else -		BUG_ON(clock_fd < 0); +		BUG_ON(cycle_fd < 0);  } -static u64 get_clock(void) +static u64 get_cycle(void)  {  	int ret;  	u64 clk; -	ret = read(clock_fd, &clk, sizeof(u64)); +	ret = read(cycle_fd, &clk, sizeof(u64));  	BUG_ON(ret != sizeof(u64));  	return clk; @@ -115,9 +115,9 @@ static void alloc_mem(void **dst, size_t length)  		die("memory allocation failed - maybe length is too large?\n");  } -static u64 do_memset_clock(memset_t fn, size_t len, bool prefault) +static u64 do_memset_cycle(memset_t fn, size_t len, bool prefault)  { -	u64 clock_start = 0ULL, clock_end = 0ULL; +	u64 cycle_start = 0ULL, cycle_end = 0ULL;  	void *dst = NULL;  	int i; @@ -126,13 +126,13 @@ static u64 do_memset_clock(memset_t fn, size_t len, bool prefault)  	if (prefault)  		fn(dst, -1, len); -	clock_start = get_clock(); +	cycle_start = get_cycle();  	for (i = 0; i < iterations; ++i)  		fn(dst, i, len); -	clock_end = get_clock(); +	cycle_end = get_cycle();  	free(dst); -	return clock_end - clock_start; +	return cycle_end - cycle_start;  }  static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) @@ -176,17 +176,17 @@ int bench_mem_memset(int argc, const char **argv,  	int i;  	size_t len;  	double result_bps[2]; -	u64 result_clock[2]; +	u64 result_cycle[2];  	argc = parse_options(argc, argv, options,  			     bench_mem_memset_usage, 0); -	if (use_clock) -		init_clock(); +	if (use_cycle) +		init_cycle();  	len = (size_t)perf_atoll((char *)length_str); -	result_clock[0] = result_clock[1] = 0ULL; +	result_cycle[0] = result_cycle[1] = 0ULL;  	result_bps[0] = result_bps[1] = 0.0;  	if ((s64)len <= 0) { @@ -217,11 +217,11 @@ int bench_mem_memset(int argc, const char **argv,  	if (!only_prefault && !no_prefault) {  		/* show both of results */ -		if (use_clock) { -			result_clock[0] = -				do_memset_clock(routines[i].fn, len, false); -			result_clock[1] = -				do_memset_clock(routines[i].fn, len, true); +		if (use_cycle) { +			result_cycle[0] = +				do_memset_cycle(routines[i].fn, len, false); +			result_cycle[1] = +				do_memset_cycle(routines[i].fn, len, true);  		} else {  			result_bps[0] =  				do_memset_gettimeofday(routines[i].fn, @@ -231,9 +231,9 @@ int bench_mem_memset(int argc, const char **argv,  						len, true);  		}  	} else { -		if (use_clock) { -			result_clock[pf] = -				do_memset_clock(routines[i].fn, +		if (use_cycle) { +			result_cycle[pf] = +				do_memset_cycle(routines[i].fn,  						len, only_prefault);  		} else {  			result_bps[pf] = @@ -245,12 +245,12 @@ int bench_mem_memset(int argc, const char **argv,  	switch (bench_format) {  	case BENCH_FORMAT_DEFAULT:  		if (!only_prefault && !no_prefault) { -			if (use_clock) { -				printf(" %14lf Clock/Byte\n", -					(double)result_clock[0] +			if (use_cycle) { +				printf(" %14lf Cycle/Byte\n", +					(double)result_cycle[0]  					/ (double)len); -				printf(" %14lf Clock/Byte (with prefault)\n ", -					(double)result_clock[1] +				printf(" %14lf Cycle/Byte (with prefault)\n ", +					(double)result_cycle[1]  					/ (double)len);  			} else {  				print_bps(result_bps[0]); @@ -259,9 +259,9 @@ int bench_mem_memset(int argc, const char **argv,  				printf(" (with prefault)\n");  			}  		} else { -			if (use_clock) { -				printf(" %14lf Clock/Byte", -					(double)result_clock[pf] +			if (use_cycle) { +				printf(" %14lf Cycle/Byte", +					(double)result_cycle[pf]  					/ (double)len);  			} else  				print_bps(result_bps[pf]); @@ -271,17 +271,17 @@ int bench_mem_memset(int argc, const char **argv,  		break;  	case BENCH_FORMAT_SIMPLE:  		if (!only_prefault && !no_prefault) { -			if (use_clock) { +			if (use_cycle) {  				printf("%lf %lf\n", -					(double)result_clock[0] / (double)len, -					(double)result_clock[1] / (double)len); +					(double)result_cycle[0] / (double)len, +					(double)result_cycle[1] / (double)len);  			} else {  				printf("%lf %lf\n",  					result_bps[0], result_bps[1]);  			}  		} else { -			if (use_clock) { -				printf("%lf\n", (double)result_clock[pf] +			if (use_cycle) { +				printf("%lf\n", (double)result_cycle[pf]  					/ (double)len);  			} else  				printf("%lf\n", result_bps[pf]); diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index b0e74ab2d7a..1f310021644 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -33,7 +33,7 @@ struct bench_suite {  };  						\  /* sentinel: easy for help */ -#define suite_all { "all", "test all suite (pseudo suite)", NULL } +#define suite_all { "all", "Test all benchmark suites", NULL }  static struct bench_suite sched_suites[] = {  	{ "messaging", @@ -75,7 +75,7 @@ static struct bench_subsys subsystems[] = {  	  "memory access performance",  	  mem_suites },  	{ "all",		/* sentinel: easy for help */ -	  "test all subsystem (pseudo subsystem)", +	  "all benchmark subsystem",  	  NULL },  	{ NULL,  	  NULL, diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index acd78dc2834..0dd5a058f76 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -60,7 +60,7 @@ static int __cmd_evlist(const char *input_name, struct perf_attr_details *detail  	list_for_each_entry(pos, &session->evlist->entries, node) {  		bool first = true; -		printf("%s", event_name(pos)); +		printf("%s", perf_evsel__name(pos));  		if (details->verbose || details->freq) {  			comma_printf(&first, " sample_freq=%" PRIu64, diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 547af48deb4..ce35015f2dc 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -57,6 +57,11 @@ static unsigned long nr_allocs, nr_cross_allocs;  #define PATH_SYS_NODE	"/sys/devices/system/node" +struct perf_kmem { +	struct perf_tool    tool; +	struct perf_session *session; +}; +  static void init_cpunode_map(void)  {  	FILE *fp; @@ -278,14 +283,16 @@ static void process_free_event(void *data,  	s_alloc->alloc_cpu = -1;  } -static void process_raw_event(union perf_event *raw_event __used, void *data, +static void process_raw_event(struct perf_tool *tool, +			      union perf_event *raw_event __used, void *data,  			      int cpu, u64 timestamp, struct thread *thread)  { +	struct perf_kmem *kmem = container_of(tool, struct perf_kmem, tool);  	struct event_format *event;  	int type; -	type = trace_parse_common_type(data); -	event = trace_find_event(type); +	type = trace_parse_common_type(kmem->session->pevent, data); +	event = pevent_find_event(kmem->session->pevent, type);  	if (!strcmp(event->name, "kmalloc") ||  	    !strcmp(event->name, "kmem_cache_alloc")) { @@ -306,7 +313,7 @@ static void process_raw_event(union perf_event *raw_event __used, void *data,  	}  } -static int process_sample_event(struct perf_tool *tool __used, +static int process_sample_event(struct perf_tool *tool,  				union perf_event *event,  				struct perf_sample *sample,  				struct perf_evsel *evsel __used, @@ -322,16 +329,18 @@ static int process_sample_event(struct perf_tool *tool __used,  	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); -	process_raw_event(event, sample->raw_data, sample->cpu, +	process_raw_event(tool, event, sample->raw_data, sample->cpu,  			  sample->time, thread);  	return 0;  } -static struct perf_tool perf_kmem = { -	.sample			= process_sample_event, -	.comm			= perf_event__process_comm, -	.ordered_samples	= true, +static struct perf_kmem perf_kmem = { +	.tool = { +		.sample			= process_sample_event, +		.comm			= perf_event__process_comm, +		.ordered_samples	= true, +	},  };  static double fragmentation(unsigned long n_req, unsigned long n_alloc) @@ -486,11 +495,15 @@ static void sort_result(void)  static int __cmd_kmem(void)  {  	int err = -EINVAL; -	struct perf_session *session = perf_session__new(input_name, O_RDONLY, -							 0, false, &perf_kmem); +	struct perf_session *session; + +	session = perf_session__new(input_name, O_RDONLY, 0, false, +				    &perf_kmem.tool);  	if (session == NULL)  		return -ENOMEM; +	perf_kmem.session = session; +  	if (perf_session__create_kernel_maps(session) < 0)  		goto out_delete; @@ -498,7 +511,7 @@ static int __cmd_kmem(void)  		goto out_delete;  	setup_pager(); -	err = perf_session__process_events(session, &perf_kmem); +	err = perf_session__process_events(session, &perf_kmem.tool);  	if (err != 0)  		goto out_delete;  	sort_result(); diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index fd53319de20..b3c42854886 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -724,8 +724,8 @@ process_raw_event(void *data, int cpu, u64 timestamp, struct thread *thread)  	struct event_format *event;  	int type; -	type = trace_parse_common_type(data); -	event = trace_find_event(type); +	type = trace_parse_common_type(session->pevent, data); +	event = pevent_find_event(session->pevent, type);  	if (!strcmp(event->name, "lock_acquire"))  		process_lock_acquire_event(data, event, cpu, timestamp, thread); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f95840d04e4..f5a6452931e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -265,7 +265,7 @@ try_again:  			if (err == ENOENT) {  				ui__error("The %s event is not supported.\n", -					    event_name(pos)); +					  perf_evsel__name(pos));  				exit(EXIT_FAILURE);  			} @@ -916,7 +916,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)  		usage_with_options(record_usage, record_options);  	list_for_each_entry(pos, &evsel_list->entries, node) { -		if (perf_header__push_event(pos->attr.config, event_name(pos))) +		if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))  			goto out_free_fd;  	} diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 25249f76329..69b1c118515 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -69,7 +69,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,  	if ((sort__has_parent || symbol_conf.use_callchain)  	    && sample->callchain) { -		err = machine__resolve_callchain(machine, evsel, al->thread, +		err = machine__resolve_callchain(machine, al->thread,  						 sample->callchain, &parent);  		if (err)  			return err; @@ -140,7 +140,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,  	struct hist_entry *he;  	if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { -		err = machine__resolve_callchain(machine, evsel, al->thread, +		err = machine__resolve_callchain(machine, al->thread,  						 sample->callchain, &parent);  		if (err)  			return err; @@ -230,7 +230,7 @@ static int process_read_event(struct perf_tool *tool,  	struct perf_report *rep = container_of(tool, struct perf_report, tool);  	if (rep->show_threads) { -		const char *name = evsel ? event_name(evsel) : "unknown"; +		const char *name = evsel ? perf_evsel__name(evsel) : "unknown";  		perf_read_values_add_value(&rep->show_threads_values,  					   event->read.pid, event->read.tid,  					   event->read.id, @@ -239,17 +239,18 @@ static int process_read_event(struct perf_tool *tool,  	}  	dump_printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid, -		    evsel ? event_name(evsel) : "FAIL", +		    evsel ? perf_evsel__name(evsel) : "FAIL",  		    event->read.value);  	return 0;  } +/* For pipe mode, sample_type is not currently set */  static int perf_report__setup_sample_type(struct perf_report *rep)  {  	struct perf_session *self = rep->session; -	if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) { +	if (!self->fd_pipe && !(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {  		if (sort__has_parent) {  			ui__error("Selected --sort parent, but no "  				    "callchain data. Did you call " @@ -272,7 +273,8 @@ static int perf_report__setup_sample_type(struct perf_report *rep)  	}  	if (sort__branch_mode == 1) { -		if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) { +		if (!self->fd_pipe && +		    !(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) {  			ui__error("Selected -b but no branch data. "  				  "Did you call perf record without -b?\n");  			return -1; @@ -314,7 +316,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,  	list_for_each_entry(pos, &evlist->entries, node) {  		struct hists *hists = &pos->hists; -		const char *evname = event_name(pos); +		const char *evname = perf_evsel__name(pos);  		hists__fprintf_nr_sample_events(hists, evname, stdout);  		hists__fprintf(hists, NULL, false, true, 0, 0, stdout); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index b125e07eb39..7a9ad2b1ee7 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -43,6 +43,11 @@ static u64			sleep_measurement_overhead;  static unsigned long		nr_tasks; +struct perf_sched { +	struct perf_tool    tool; +	struct perf_session *session; +}; +  struct sched_atom;  struct task_desc { @@ -1597,11 +1602,13 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool,  						 struct perf_evsel *evsel,  						 struct machine *machine)  { +	struct perf_sched *sched = container_of(tool, struct perf_sched, tool); +	struct pevent *pevent = sched->session->pevent;  	struct thread *thread = machine__findnew_thread(machine, sample->pid);  	if (thread == NULL) {  		pr_debug("problem processing %s event, skipping it.\n", -			 evsel->name); +			 perf_evsel__name(evsel));  		return -1;  	} @@ -1612,7 +1619,8 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool,  		tracepoint_handler f = evsel->handler.func;  		if (evsel->handler.data == NULL) -			evsel->handler.data = trace_find_event(evsel->attr.config); +			evsel->handler.data = pevent_find_event(pevent, +							  evsel->attr.config);  		f(tool, evsel->handler.data, sample, machine, thread);  	} @@ -1620,12 +1628,14 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool,  	return 0;  } -static struct perf_tool perf_sched = { -	.sample			= perf_sched__process_tracepoint_sample, -	.comm			= perf_event__process_comm, -	.lost			= perf_event__process_lost, -	.fork			= perf_event__process_task, -	.ordered_samples	= true, +static struct perf_sched perf_sched = { +	.tool = { +		.sample		 = perf_sched__process_tracepoint_sample, +		.comm		 = perf_event__process_comm, +		.lost		 = perf_event__process_lost, +		.fork		 = perf_event__process_task, +		.ordered_samples = true, +	},  };  static void read_events(bool destroy, struct perf_session **psession) @@ -1640,16 +1650,20 @@ static void read_events(bool destroy, struct perf_session **psession)  		{ "sched:sched_process_exit", process_sched_exit_event, },  		{ "sched:sched_migrate_task", process_sched_migrate_task_event, },  	}; -	struct perf_session *session = perf_session__new(input_name, O_RDONLY, -							 0, false, &perf_sched); +	struct perf_session *session; + +	session = perf_session__new(input_name, O_RDONLY, 0, false, +				    &perf_sched.tool);  	if (session == NULL)  		die("No Memory"); -	err = perf_evlist__set_tracepoints_handlers_array(session->evlist, handlers); +	perf_sched.session = session; + +	err = perf_session__set_tracepoints_handlers(session, handlers);  	assert(err == 0);  	if (perf_session__has_traces(session, "record -R")) { -		err = perf_session__process_events(session, &perf_sched); +		err = perf_session__process_events(session, &perf_sched.tool);  		if (err)  			die("Failed to process events, error %d", err); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 8e395a538eb..1e60ab70b2b 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -28,6 +28,11 @@ static bool			system_wide;  static const char		*cpu_list;  static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +struct perf_script { +	struct perf_tool    tool; +	struct perf_session *session; +}; +  enum perf_output_field {  	PERF_OUTPUT_COMM            = 1U << 0,  	PERF_OUTPUT_TID             = 1U << 1, @@ -137,10 +142,11 @@ static const char *output_field2str(enum perf_output_field field)  #define PRINT_FIELD(x)  (output[attr->type].fields & PERF_OUTPUT_##x) -static int perf_event_attr__check_stype(struct perf_event_attr *attr, -				  u64 sample_type, const char *sample_msg, -				  enum perf_output_field field) +static int perf_evsel__check_stype(struct perf_evsel *evsel, +				   u64 sample_type, const char *sample_msg, +				   enum perf_output_field field)  { +	struct perf_event_attr *attr = &evsel->attr;  	int type = attr->type;  	const char *evname; @@ -148,7 +154,7 @@ static int perf_event_attr__check_stype(struct perf_event_attr *attr,  		return 0;  	if (output[type].user_set) { -		evname = __event_name(attr->type, attr->config); +		evname = perf_evsel__name(evsel);  		pr_err("Samples for '%s' event do not have %s attribute set. "  		       "Cannot print '%s' field.\n",  		       evname, sample_msg, output_field2str(field)); @@ -157,7 +163,7 @@ static int perf_event_attr__check_stype(struct perf_event_attr *attr,  	/* user did not ask for it explicitly so remove from the default list */  	output[type].fields &= ~field; -	evname = __event_name(attr->type, attr->config); +	evname = perf_evsel__name(evsel);  	pr_debug("Samples for '%s' event do not have %s attribute set. "  		 "Skipping '%s' field.\n",  		 evname, sample_msg, output_field2str(field)); @@ -175,8 +181,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,  		return -EINVAL;  	if (PRINT_FIELD(IP)) { -		if (perf_event_attr__check_stype(attr, PERF_SAMPLE_IP, "IP", -					   PERF_OUTPUT_IP)) +		if (perf_evsel__check_stype(evsel, PERF_SAMPLE_IP, "IP", +					    PERF_OUTPUT_IP))  			return -EINVAL;  		if (!no_callchain && @@ -185,8 +191,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,  	}  	if (PRINT_FIELD(ADDR) && -		perf_event_attr__check_stype(attr, PERF_SAMPLE_ADDR, "ADDR", -				       PERF_OUTPUT_ADDR)) +		perf_evsel__check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR", +					PERF_OUTPUT_ADDR))  		return -EINVAL;  	if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { @@ -208,18 +214,18 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,  	}  	if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) && -		perf_event_attr__check_stype(attr, PERF_SAMPLE_TID, "TID", -				       PERF_OUTPUT_TID|PERF_OUTPUT_PID)) +		perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", +					PERF_OUTPUT_TID|PERF_OUTPUT_PID))  		return -EINVAL;  	if (PRINT_FIELD(TIME) && -		perf_event_attr__check_stype(attr, PERF_SAMPLE_TIME, "TIME", -				       PERF_OUTPUT_TIME)) +		perf_evsel__check_stype(evsel, PERF_SAMPLE_TIME, "TIME", +					PERF_OUTPUT_TIME))  		return -EINVAL;  	if (PRINT_FIELD(CPU) && -		perf_event_attr__check_stype(attr, PERF_SAMPLE_CPU, "CPU", -				       PERF_OUTPUT_CPU)) +		perf_evsel__check_stype(evsel, PERF_SAMPLE_CPU, "CPU", +					PERF_OUTPUT_CPU))  		return -EINVAL;  	return 0; @@ -256,11 +262,13 @@ static int perf_session__check_output_opt(struct perf_session *session)  	return 0;  } -static void print_sample_start(struct perf_sample *sample, +static void print_sample_start(struct pevent *pevent, +			       struct perf_sample *sample,  			       struct thread *thread, -			       struct perf_event_attr *attr) +			       struct perf_evsel *evsel)  {  	int type; +	struct perf_event_attr *attr = &evsel->attr;  	struct event_format *event;  	const char *evname = NULL;  	unsigned long secs; @@ -300,12 +308,18 @@ static void print_sample_start(struct perf_sample *sample,  	if (PRINT_FIELD(EVNAME)) {  		if (attr->type == PERF_TYPE_TRACEPOINT) { -			type = trace_parse_common_type(sample->raw_data); -			event = trace_find_event(type); +			/* +			 * XXX Do we really need this here? +			 * perf_evlist__set_tracepoint_names should have done +			 * this already +			 */ +			type = trace_parse_common_type(pevent, +						       sample->raw_data); +			event = pevent_find_event(pevent, type);  			if (event)  				evname = event->name;  		} else -			evname = __event_name(attr->type, attr->config); +			evname = perf_evsel__name(evsel);  		printf("%s: ", evname ? evname : "[unknown]");  	} @@ -387,7 +401,7 @@ static void print_sample_bts(union perf_event *event,  			printf(" ");  		else  			printf("\n"); -		perf_event__print_ip(event, sample, machine, evsel, +		perf_event__print_ip(event, sample, machine,  				     PRINT_FIELD(SYM), PRINT_FIELD(DSO),  				     PRINT_FIELD(SYMOFFSET));  	} @@ -402,6 +416,7 @@ static void print_sample_bts(union perf_event *event,  }  static void process_event(union perf_event *event __unused, +			  struct pevent *pevent,  			  struct perf_sample *sample,  			  struct perf_evsel *evsel,  			  struct machine *machine, @@ -412,7 +427,7 @@ static void process_event(union perf_event *event __unused,  	if (output[attr->type].fields == 0)  		return; -	print_sample_start(sample, thread, attr); +	print_sample_start(pevent, sample, thread, evsel);  	if (is_bts_event(attr)) {  		print_sample_bts(event, sample, evsel, machine, thread); @@ -420,7 +435,7 @@ static void process_event(union perf_event *event __unused,  	}  	if (PRINT_FIELD(TRACE)) -		print_trace_event(sample->cpu, sample->raw_data, +		print_trace_event(pevent, sample->cpu, sample->raw_data,  				  sample->raw_size);  	if (PRINT_FIELD(ADDR)) @@ -431,7 +446,7 @@ static void process_event(union perf_event *event __unused,  			printf(" ");  		else  			printf("\n"); -		perf_event__print_ip(event, sample, machine, evsel, +		perf_event__print_ip(event, sample, machine,  				     PRINT_FIELD(SYM), PRINT_FIELD(DSO),  				     PRINT_FIELD(SYMOFFSET));  	} @@ -451,7 +466,8 @@ static int default_stop_script(void)  	return 0;  } -static int default_generate_script(const char *outfile __unused) +static int default_generate_script(struct pevent *pevent __unused, +				   const char *outfile __unused)  {  	return 0;  } @@ -489,6 +505,7 @@ static int process_sample_event(struct perf_tool *tool __used,  				struct machine *machine)  {  	struct addr_location al; +	struct perf_script *scr = container_of(tool, struct perf_script, tool);  	struct thread *thread = machine__findnew_thread(machine, event->ip.tid);  	if (thread == NULL) { @@ -520,24 +537,27 @@ static int process_sample_event(struct perf_tool *tool __used,  	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))  		return 0; -	scripting_ops->process_event(event, sample, evsel, machine, thread); +	scripting_ops->process_event(event, scr->session->pevent, +				     sample, evsel, machine, thread);  	evsel->hists.stats.total_period += sample->period;  	return 0;  } -static struct perf_tool perf_script = { -	.sample		 = process_sample_event, -	.mmap		 = perf_event__process_mmap, -	.comm		 = perf_event__process_comm, -	.exit		 = perf_event__process_task, -	.fork		 = perf_event__process_task, -	.attr		 = perf_event__process_attr, -	.event_type	 = perf_event__process_event_type, -	.tracing_data	 = perf_event__process_tracing_data, -	.build_id	 = perf_event__process_build_id, -	.ordered_samples = true, -	.ordering_requires_timestamps = true, +static struct perf_script perf_script = { +	.tool = { +		.sample		 = process_sample_event, +		.mmap		 = perf_event__process_mmap, +		.comm		 = perf_event__process_comm, +		.exit		 = perf_event__process_task, +		.fork		 = perf_event__process_task, +		.attr		 = perf_event__process_attr, +		.event_type	 = perf_event__process_event_type, +		.tracing_data	 = perf_event__process_tracing_data, +		.build_id	 = perf_event__process_build_id, +		.ordered_samples = true, +		.ordering_requires_timestamps = true, +	},  };  extern volatile int session_done; @@ -553,7 +573,7 @@ static int __cmd_script(struct perf_session *session)  	signal(SIGINT, sig_handler); -	ret = perf_session__process_events(session, &perf_script); +	ret = perf_session__process_events(session, &perf_script.tool);  	if (debug_mode)  		pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered); @@ -1335,10 +1355,13 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)  	if (!script_name)  		setup_pager(); -	session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_script); +	session = perf_session__new(input_name, O_RDONLY, 0, false, +				    &perf_script.tool);  	if (session == NULL)  		return -ENOMEM; +	perf_script.session = session; +  	if (cpu_list) {  		if (perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap))  			return -1; @@ -1384,7 +1407,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)  			return -1;  		} -		err = scripting_ops->generate_script("perf-script"); +		err = scripting_ops->generate_script(session->pevent, +						     "perf-script");  		goto out;  	} diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 07b5c7703dd..861f0aec77a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -391,7 +391,7 @@ static int read_counter_aggr(struct perf_evsel *counter)  	if (verbose) {  		fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", -			event_name(counter), count[0], count[1], count[2]); +			perf_evsel__name(counter), count[0], count[1], count[2]);  	}  	/* @@ -496,7 +496,7 @@ static int run_perf_stat(int argc __used, const char **argv)  			    errno == ENXIO) {  				if (verbose)  					ui__warning("%s event is not supported by the kernel.\n", -						    event_name(counter)); +						    perf_evsel__name(counter));  				counter->supported = false;  				continue;  			} @@ -594,7 +594,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)  			csv_output ? 0 : -4,  			evsel_list->cpus->map[cpu], csv_sep); -	fprintf(output, fmt, cpustr, msecs, csv_sep, event_name(evsel)); +	fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel));  	if (evsel->cgrp)  		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); @@ -792,7 +792,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)  	else  		cpu = 0; -	fprintf(output, fmt, cpustr, avg, csv_sep, event_name(evsel)); +	fprintf(output, fmt, cpustr, avg, csv_sep, perf_evsel__name(evsel));  	if (evsel->cgrp)  		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); @@ -908,7 +908,7 @@ static void print_counter_aggr(struct perf_evsel *counter)  			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,  			csv_sep,  			csv_output ? 0 : -24, -			event_name(counter)); +			perf_evsel__name(counter));  		if (counter->cgrp)  			fprintf(output, "%s%s", csv_sep, counter->cgrp->name); @@ -961,7 +961,7 @@ static void print_counter(struct perf_evsel *counter)  				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,  				csv_sep,  				csv_output ? 0 : -24, -				event_name(counter)); +				perf_evsel__name(counter));  			if (counter->cgrp)  				fprintf(output, "%s%s", diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 5a8727c0875..5ce30305462 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -583,7 +583,7 @@ static int test__basic_mmap(void)  		if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {  			pr_debug("expected %d %s events, got %d\n",  				 expected_nr_events[evsel->idx], -				 event_name(evsel), nr_events[evsel->idx]); +				 perf_evsel__name(evsel), nr_events[evsel->idx]);  			goto out_munmap;  		}  	} diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 6bb0277b7df..e3cab5f088f 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -245,7 +245,7 @@ static void perf_top__show_details(struct perf_top *top)  	if (notes->src == NULL)  		goto out_unlock; -	printf("Showing %s for %s\n", event_name(top->sym_evsel), symbol->name); +	printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name);  	printf("  Events  Pcnt (>=%d%%)\n", top->sym_pcnt_filter);  	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx, @@ -408,7 +408,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top)  	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);  	if (top->evlist->nr_entries > 1) -		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(top->sym_evsel)); +		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", perf_evsel__name(top->sym_evsel));  	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter); @@ -503,13 +503,13 @@ static void perf_top__handle_keypress(struct perf_top *top, int c)  				fprintf(stderr, "\nAvailable events:");  				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node) -					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, event_name(top->sym_evsel)); +					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel));  				prompt_integer(&counter, "Enter details event counter");  				if (counter >= top->evlist->nr_entries) {  					top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node); -					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top->sym_evsel)); +					fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel));  					sleep(1);  					break;  				} @@ -774,7 +774,7 @@ static void perf_event__process_sample(struct perf_tool *tool,  		if ((sort__has_parent || symbol_conf.use_callchain) &&  		    sample->callchain) { -			err = machine__resolve_callchain(machine, evsel, al.thread, +			err = machine__resolve_callchain(machine, al.thread,  							 sample->callchain, &parent);  			if (err)  				return; @@ -960,7 +960,7 @@ try_again:  			if (err == ENOENT) {  				ui__error("The %s event is not supported.\n", -					    event_name(counter)); +					  perf_evsel__name(counter));  				goto out_err;  			} else if (err == EMFILE) {  				ui__error("Too many events are opened.\n" diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak index d9084e03ce5..6c18785a641 100644 --- a/tools/perf/config/feature-tests.mak +++ b/tools/perf/config/feature-tests.mak @@ -78,6 +78,19 @@ int main(int argc, char *argv[])          return 0;  }  endef + +define SOURCE_GTK2_INFOBAR +#pragma GCC diagnostic ignored \"-Wstrict-prototypes\" +#include <gtk/gtk.h> +#pragma GCC diagnostic error \"-Wstrict-prototypes\" + +int main(void) +{ +	gtk_info_bar_new(); + +	return 0; +} +endef  endif  ifndef NO_LIBPERL diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 34b1c46eaf4..67a2703e666 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -814,7 +814,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,  {  	struct disasm_line *pos, *n;  	struct annotation *notes; -	const size_t size = symbol__size(sym); +	size_t size;  	struct map_symbol ms = {  		.map = map,  		.sym = sym, @@ -834,6 +834,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,  	if (sym == NULL)  		return -1; +	size = symbol__size(sym); +  	if (map->dso->annotate_warned)  		return -1; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 53f6697d014..482f0517b61 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -23,6 +23,7 @@ struct hist_browser {  	struct hists	    *hists;  	struct hist_entry   *he_selection;  	struct map_symbol   *selection; +	int		     print_seq;  	bool		     has_symbols;  }; @@ -800,6 +801,196 @@ do_offset:  	}  } +static int hist_browser__fprintf_callchain_node_rb_tree(struct hist_browser *browser, +							struct callchain_node *chain_node, +							u64 total, int level, +							FILE *fp) +{ +	struct rb_node *node; +	int offset = level * LEVEL_OFFSET_STEP; +	u64 new_total, remaining; +	int printed = 0; + +	if (callchain_param.mode == CHAIN_GRAPH_REL) +		new_total = chain_node->children_hit; +	else +		new_total = total; + +	remaining = new_total; +	node = rb_first(&chain_node->rb_root); +	while (node) { +		struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); +		struct rb_node *next = rb_next(node); +		u64 cumul = callchain_cumul_hits(child); +		struct callchain_list *chain; +		char folded_sign = ' '; +		int first = true; +		int extra_offset = 0; + +		remaining -= cumul; + +		list_for_each_entry(chain, &child->val, list) { +			char ipstr[BITS_PER_LONG / 4 + 1], *alloc_str; +			const char *str; +			bool was_first = first; + +			if (first) +				first = false; +			else +				extra_offset = LEVEL_OFFSET_STEP; + +			folded_sign = callchain_list__folded(chain); + +			alloc_str = NULL; +			str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); +			if (was_first) { +				double percent = cumul * 100.0 / new_total; + +				if (asprintf(&alloc_str, "%2.2f%% %s", percent, str) < 0) +					str = "Not enough memory!"; +				else +					str = alloc_str; +			} + +			printed += fprintf(fp, "%*s%c %s\n", offset + extra_offset, " ", folded_sign, str); +			free(alloc_str); +			if (folded_sign == '+') +				break; +		} + +		if (folded_sign == '-') { +			const int new_level = level + (extra_offset ? 2 : 1); +			printed += hist_browser__fprintf_callchain_node_rb_tree(browser, child, new_total, +										new_level, fp); +		} + +		node = next; +	} + +	return printed; +} + +static int hist_browser__fprintf_callchain_node(struct hist_browser *browser, +						struct callchain_node *node, +						int level, FILE *fp) +{ +	struct callchain_list *chain; +	int offset = level * LEVEL_OFFSET_STEP; +	char folded_sign = ' '; +	int printed = 0; + +	list_for_each_entry(chain, &node->val, list) { +		char ipstr[BITS_PER_LONG / 4 + 1], *s; + +		folded_sign = callchain_list__folded(chain); +		s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); +		printed += fprintf(fp, "%*s%c %s\n", offset, " ", folded_sign, s); +	} + +	if (folded_sign == '-') +		printed += hist_browser__fprintf_callchain_node_rb_tree(browser, node, +									browser->hists->stats.total_period, +									level + 1,  fp); +	return printed; +} + +static int hist_browser__fprintf_callchain(struct hist_browser *browser, +					   struct rb_root *chain, int level, FILE *fp) +{ +	struct rb_node *nd; +	int printed = 0; + +	for (nd = rb_first(chain); nd; nd = rb_next(nd)) { +		struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node); + +		printed += hist_browser__fprintf_callchain_node(browser, node, level, fp); +	} + +	return printed; +} + +static int hist_browser__fprintf_entry(struct hist_browser *browser, +				       struct hist_entry *he, FILE *fp) +{ +	char s[8192]; +	double percent; +	int printed = 0; +	char folded_sign = ' '; + +	if (symbol_conf.use_callchain) +		folded_sign = hist_entry__folded(he); + +	hist_entry__snprintf(he, s, sizeof(s), browser->hists); +	percent = (he->period * 100.0) / browser->hists->stats.total_period; + +	if (symbol_conf.use_callchain) +		printed += fprintf(fp, "%c ", folded_sign); + +	printed += fprintf(fp, " %5.2f%%", percent); + +	if (symbol_conf.show_nr_samples) +		printed += fprintf(fp, " %11u", he->nr_events); + +	if (symbol_conf.show_total_period) +		printed += fprintf(fp, " %12" PRIu64, he->period); + +	printed += fprintf(fp, "%s\n", rtrim(s)); + +	if (folded_sign == '-') +		printed += hist_browser__fprintf_callchain(browser, &he->sorted_chain, 1, fp); + +	return printed; +} + +static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) +{ +	struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries)); +	int printed = 0; + +	while (nd) { +		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + +		printed += hist_browser__fprintf_entry(browser, h, fp); +		nd = hists__filter_entries(rb_next(nd)); +	} + +	return printed; +} + +static int hist_browser__dump(struct hist_browser *browser) +{ +	char filename[64]; +	FILE *fp; + +	while (1) { +		scnprintf(filename, sizeof(filename), "perf.hist.%d", browser->print_seq); +		if (access(filename, F_OK)) +			break; +		/* + 		 * XXX: Just an arbitrary lazy upper limit + 		 */ +		if (++browser->print_seq == 8192) { +			ui_helpline__fpush("Too many perf.hist.N files, nothing written!"); +			return -1; +		} +	} + +	fp = fopen(filename, "w"); +	if (fp == NULL) { +		char bf[64]; +		strerror_r(errno, bf, sizeof(bf)); +		ui_helpline__fpush("Couldn't write to %s: %s", filename, bf); +		return -1; +	} + +	++browser->print_seq; +	hist_browser__fprintf(browser, fp); +	fclose(fp); +	ui_helpline__fpush("%s written!", filename); + +	return 0; +} +  static struct hist_browser *hist_browser__new(struct hists *hists)  {  	struct hist_browser *browser = zalloc(sizeof(*browser)); @@ -937,6 +1128,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,  			    browser->selection->map->dso->annotate_warned)  				continue;  			goto do_annotate; +		case 'P': +			hist_browser__dump(browser); +			continue;  		case 'd':  			goto zoom_dso;  		case 't': @@ -969,6 +1163,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,  					"E             Expand all callchains\n"  					"d             Zoom into current DSO\n"  					"t             Zoom into current Thread\n" +					"P             Print histograms to perf.hist.N\n"  					"/             Filter symbol by name");  			continue;  		case K_ENTER: @@ -1172,7 +1367,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser,  	struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node);  	bool current_entry = ui_browser__is_current_entry(browser, row);  	unsigned long nr_events = evsel->hists.stats.nr_events[PERF_RECORD_SAMPLE]; -	const char *ev_name = event_name(evsel); +	const char *ev_name = perf_evsel__name(evsel);  	char bf[256], unit;  	const char *warn = " ";  	size_t printed; @@ -1240,7 +1435,7 @@ browse_hists:  			 */  			if (timer)  				timer(arg); -			ev_name = event_name(pos); +			ev_name = perf_evsel__name(pos);  			key = perf_evsel__hists_browse(pos, nr_events, help,  						       ev_name, true, timer,  						       arg, delay_secs); @@ -1309,17 +1504,11 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,  	ui_helpline__push("Press ESC to exit");  	list_for_each_entry(pos, &evlist->entries, node) { -		const char *ev_name = event_name(pos); +		const char *ev_name = perf_evsel__name(pos);  		size_t line_len = strlen(ev_name) + 7;  		if (menu.b.width < line_len)  			menu.b.width = line_len; -		/* -		 * Cache the evsel name, tracepoints have a _high_ cost per -		 * event_name() call. -		 */ -		if (pos->name == NULL) -			pos->name = strdup(ev_name);  	}  	return perf_evsel_menu__run(&menu, evlist->nr_entries, help, timer, @@ -1330,11 +1519,10 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,  				  void(*timer)(void *arg), void *arg,  				  int delay_secs)  { -  	if (evlist->nr_entries == 1) {  		struct perf_evsel *first = list_entry(evlist->entries.next,  						      struct perf_evsel, node); -		const char *ev_name = event_name(first); +		const char *ev_name = perf_evsel__name(first);  		return perf_evsel__hists_browse(first, evlist->nr_entries, help,  						ev_name, false, timer, arg,  						delay_secs); diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c index 0656c381a89..ec12e0b4ded 100644 --- a/tools/perf/ui/gtk/browser.c +++ b/tools/perf/ui/gtk/browser.c @@ -11,8 +11,8 @@  static void perf_gtk__signal(int sig)  { +	perf_gtk__exit(false);  	psignal(sig, "perf"); -	gtk_main_quit();  }  static void perf_gtk__resize_window(GtkWidget *window) @@ -122,13 +122,59 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists)  	gtk_container_add(GTK_CONTAINER(window), view);  } +#ifdef HAVE_GTK_INFO_BAR +static GtkWidget *perf_gtk__setup_info_bar(void) +{ +	GtkWidget *info_bar; +	GtkWidget *label; +	GtkWidget *content_area; + +	info_bar = gtk_info_bar_new(); +	gtk_widget_set_no_show_all(info_bar, TRUE); + +	label = gtk_label_new(""); +	gtk_widget_show(label); + +	content_area = gtk_info_bar_get_content_area(GTK_INFO_BAR(info_bar)); +	gtk_container_add(GTK_CONTAINER(content_area), label); + +	gtk_info_bar_add_button(GTK_INFO_BAR(info_bar), GTK_STOCK_OK, +				GTK_RESPONSE_OK); +	g_signal_connect(info_bar, "response", +			 G_CALLBACK(gtk_widget_hide), NULL); + +	pgctx->info_bar = info_bar; +	pgctx->message_label = label; + +	return info_bar; +} +#endif + +static GtkWidget *perf_gtk__setup_statusbar(void) +{ +	GtkWidget *stbar; +	unsigned ctxid; + +	stbar = gtk_statusbar_new(); + +	ctxid = gtk_statusbar_get_context_id(GTK_STATUSBAR(stbar), +					     "perf report"); +	pgctx->statbar = stbar; +	pgctx->statbar_ctx_id = ctxid; + +	return stbar; +} +  int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,  				  const char *help __used,  				  void (*timer) (void *arg)__used,  				  void *arg __used, int delay_secs __used)  {  	struct perf_evsel *pos; +	GtkWidget *vbox;  	GtkWidget *notebook; +	GtkWidget *info_bar; +	GtkWidget *statbar;  	GtkWidget *window;  	signal(SIGSEGV, perf_gtk__signal); @@ -143,11 +189,17 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,  	g_signal_connect(window, "delete_event", gtk_main_quit, NULL); +	pgctx = perf_gtk__activate_context(window); +	if (!pgctx) +		return -1; + +	vbox = gtk_vbox_new(FALSE, 0); +  	notebook = gtk_notebook_new();  	list_for_each_entry(pos, &evlist->entries, node) {  		struct hists *hists = &pos->hists; -		const char *evname = event_name(pos); +		const char *evname = perf_evsel__name(pos);  		GtkWidget *scrolled_window;  		GtkWidget *tab_label; @@ -164,7 +216,16 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,  		gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window, tab_label);  	} -	gtk_container_add(GTK_CONTAINER(window), notebook); +	gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0); + +	info_bar = perf_gtk__setup_info_bar(); +	if (info_bar) +		gtk_box_pack_start(GTK_BOX(vbox), info_bar, FALSE, FALSE, 0); + +	statbar = perf_gtk__setup_statusbar(); +	gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0); + +	gtk_container_add(GTK_CONTAINER(window), vbox);  	gtk_widget_show_all(window); @@ -174,5 +235,7 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,  	gtk_main(); +	perf_gtk__deactivate_context(&pgctx); +  	return 0;  } diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h index 75177ee0403..a4d0f2b4a2d 100644 --- a/tools/perf/ui/gtk/gtk.h +++ b/tools/perf/ui/gtk/gtk.h @@ -1,8 +1,39 @@  #ifndef _PERF_GTK_H_  #define _PERF_GTK_H_ 1 +#include <stdbool.h> +  #pragma GCC diagnostic ignored "-Wstrict-prototypes"  #include <gtk/gtk.h>  #pragma GCC diagnostic error "-Wstrict-prototypes" + +struct perf_gtk_context { +	GtkWidget *main_window; + +#ifdef HAVE_GTK_INFO_BAR +	GtkWidget *info_bar; +	GtkWidget *message_label; +#endif +	GtkWidget *statbar; +	guint statbar_ctx_id; +}; + +extern struct perf_gtk_context *pgctx; + +static inline bool perf_gtk__is_active_context(struct perf_gtk_context *ctx) +{ +	return ctx && ctx->main_window; +} + +struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window); +int perf_gtk__deactivate_context(struct perf_gtk_context **ctx); + +#ifndef HAVE_GTK_INFO_BAR +static inline GtkWidget *perf_gtk__setup_info_bar(void) +{ +	return NULL; +} +#endif +  #endif /* _PERF_GTK_H_ */ diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c index 82952995776..92879ce61e2 100644 --- a/tools/perf/ui/gtk/setup.c +++ b/tools/perf/ui/gtk/setup.c @@ -1,12 +1,17 @@  #include "gtk.h"  #include "../../util/cache.h" +#include "../../util/debug.h" + +extern struct perf_error_ops perf_gtk_eops;  int perf_gtk__init(void)  { +	perf_error__register(&perf_gtk_eops);  	return gtk_init_check(NULL, NULL) ? 0 : -1;  }  void perf_gtk__exit(bool wait_for_ok __used)  { +	perf_error__unregister(&perf_gtk_eops);  	gtk_main_quit();  } diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c new file mode 100644 index 00000000000..0ead373c0df --- /dev/null +++ b/tools/perf/ui/gtk/util.c @@ -0,0 +1,129 @@ +#include "../util.h" +#include "../../util/debug.h" +#include "gtk.h" + +#include <string.h> + + +struct perf_gtk_context *pgctx; + +struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window) +{ +	struct perf_gtk_context *ctx; + +	ctx = malloc(sizeof(*pgctx)); +	if (ctx) +		ctx->main_window = window; + +	return ctx; +} + +int perf_gtk__deactivate_context(struct perf_gtk_context **ctx) +{ +	if (!perf_gtk__is_active_context(*ctx)) +		return -1; + +	free(*ctx); +	*ctx = NULL; +	return 0; +} + +static int perf_gtk__error(const char *format, va_list args) +{ +	char *msg; +	GtkWidget *dialog; + +	if (!perf_gtk__is_active_context(pgctx) || +	    vasprintf(&msg, format, args) < 0) { +		fprintf(stderr, "Error:\n"); +		vfprintf(stderr, format, args); +		fprintf(stderr, "\n"); +		return -1; +	} + +	dialog = gtk_message_dialog_new_with_markup(GTK_WINDOW(pgctx->main_window), +					GTK_DIALOG_DESTROY_WITH_PARENT, +					GTK_MESSAGE_ERROR, +					GTK_BUTTONS_CLOSE, +					"<b>Error</b>\n\n%s", msg); +	gtk_dialog_run(GTK_DIALOG(dialog)); + +	gtk_widget_destroy(dialog); +	free(msg); +	return 0; +} + +#ifdef HAVE_GTK_INFO_BAR +static int perf_gtk__warning_info_bar(const char *format, va_list args) +{ +	char *msg; + +	if (!perf_gtk__is_active_context(pgctx) || +	    vasprintf(&msg, format, args) < 0) { +		fprintf(stderr, "Warning:\n"); +		vfprintf(stderr, format, args); +		fprintf(stderr, "\n"); +		return -1; +	} + +	gtk_label_set_text(GTK_LABEL(pgctx->message_label), msg); +	gtk_info_bar_set_message_type(GTK_INFO_BAR(pgctx->info_bar), +				      GTK_MESSAGE_WARNING); +	gtk_widget_show(pgctx->info_bar); + +	free(msg); +	return 0; +} +#else +static int perf_gtk__warning_statusbar(const char *format, va_list args) +{ +	char *msg, *p; + +	if (!perf_gtk__is_active_context(pgctx) || +	    vasprintf(&msg, format, args) < 0) { +		fprintf(stderr, "Warning:\n"); +		vfprintf(stderr, format, args); +		fprintf(stderr, "\n"); +		return -1; +	} + +	gtk_statusbar_pop(GTK_STATUSBAR(pgctx->statbar), +			  pgctx->statbar_ctx_id); + +	/* Only first line can be displayed */ +	p = strchr(msg, '\n'); +	if (p) +		*p = '\0'; + +	gtk_statusbar_push(GTK_STATUSBAR(pgctx->statbar), +			   pgctx->statbar_ctx_id, msg); + +	free(msg); +	return 0; +} +#endif + +struct perf_error_ops perf_gtk_eops = { +	.error		= perf_gtk__error, +#ifdef HAVE_GTK_INFO_BAR +	.warning	= perf_gtk__warning_info_bar, +#else +	.warning	= perf_gtk__warning_statusbar, +#endif +}; + +/* + * FIXME: Functions below should be implemented properly. + *        For now, just add stubs for NO_NEWT=1 build. + */ +#ifdef NO_NEWT_SUPPORT +int ui_helpline__show_help(const char *format __used, va_list ap __used) +{ +	return 0; +} + +void ui_progress__update(u64 curr __used, u64 total __used, +			 const char *title __used) +{ +} +#endif diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index d33e943ac43..e813c1d1734 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -15,6 +15,8 @@ pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;  static volatile int ui__need_resize; +extern struct perf_error_ops perf_tui_eops; +  void ui__refresh_dimensions(bool force)  {  	if (force || ui__need_resize) { @@ -122,6 +124,8 @@ int ui__init(void)  	signal(SIGINT, ui__signal);  	signal(SIGQUIT, ui__signal);  	signal(SIGTERM, ui__signal); + +	perf_error__register(&perf_tui_eops);  out:  	return err;  } @@ -137,4 +141,6 @@ void ui__exit(bool wait_for_ok)  	SLsmg_refresh();  	SLsmg_reset_smg();  	SLang_reset_tty(); + +	perf_error__unregister(&perf_tui_eops);  } diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c new file mode 100644 index 00000000000..092902e30ce --- /dev/null +++ b/tools/perf/ui/tui/util.c @@ -0,0 +1,243 @@ +#include "../../util/util.h" +#include <signal.h> +#include <stdbool.h> +#include <string.h> +#include <sys/ttydefaults.h> + +#include "../../util/cache.h" +#include "../../util/debug.h" +#include "../browser.h" +#include "../keysyms.h" +#include "../helpline.h" +#include "../ui.h" +#include "../util.h" +#include "../libslang.h" + +static void ui_browser__argv_write(struct ui_browser *browser, +				   void *entry, int row) +{ +	char **arg = entry; +	bool current_entry = ui_browser__is_current_entry(browser, row); + +	ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : +						       HE_COLORSET_NORMAL); +	slsmg_write_nstring(*arg, browser->width); +} + +static int popup_menu__run(struct ui_browser *menu) +{ +	int key; + +	if (ui_browser__show(menu, " ", "ESC: exit, ENTER|->: Select option") < 0) +		return -1; + +	while (1) { +		key = ui_browser__run(menu, 0); + +		switch (key) { +		case K_RIGHT: +		case K_ENTER: +			key = menu->index; +			break; +		case K_LEFT: +		case K_ESC: +		case 'q': +		case CTRL('c'): +			key = -1; +			break; +		default: +			continue; +		} + +		break; +	} + +	ui_browser__hide(menu); +	return key; +} + +int ui__popup_menu(int argc, char * const argv[]) +{ +	struct ui_browser menu = { +		.entries    = (void *)argv, +		.refresh    = ui_browser__argv_refresh, +		.seek	    = ui_browser__argv_seek, +		.write	    = ui_browser__argv_write, +		.nr_entries = argc, +	}; + +	return popup_menu__run(&menu); +} + +int ui_browser__input_window(const char *title, const char *text, char *input, +			     const char *exit_msg, int delay_secs) +{ +	int x, y, len, key; +	int max_len = 60, nr_lines = 0; +	static char buf[50]; +	const char *t; + +	t = text; +	while (1) { +		const char *sep = strchr(t, '\n'); + +		if (sep == NULL) +			sep = strchr(t, '\0'); +		len = sep - t; +		if (max_len < len) +			max_len = len; +		++nr_lines; +		if (*sep == '\0') +			break; +		t = sep + 1; +	} + +	max_len += 2; +	nr_lines += 8; +	y = SLtt_Screen_Rows / 2 - nr_lines / 2; +	x = SLtt_Screen_Cols / 2 - max_len / 2; + +	SLsmg_set_color(0); +	SLsmg_draw_box(y, x++, nr_lines, max_len); +	if (title) { +		SLsmg_gotorc(y, x + 1); +		SLsmg_write_string((char *)title); +	} +	SLsmg_gotorc(++y, x); +	nr_lines -= 7; +	max_len -= 2; +	SLsmg_write_wrapped_string((unsigned char *)text, y, x, +				   nr_lines, max_len, 1); +	y += nr_lines; +	len = 5; +	while (len--) { +		SLsmg_gotorc(y + len - 1, x); +		SLsmg_write_nstring((char *)" ", max_len); +	} +	SLsmg_draw_box(y++, x + 1, 3, max_len - 2); + +	SLsmg_gotorc(y + 3, x); +	SLsmg_write_nstring((char *)exit_msg, max_len); +	SLsmg_refresh(); + +	x += 2; +	len = 0; +	key = ui__getch(delay_secs); +	while (key != K_TIMER && key != K_ENTER && key != K_ESC) { +		if (key == K_BKSPC) { +			if (len == 0) +				goto next_key; +			SLsmg_gotorc(y, x + --len); +			SLsmg_write_char(' '); +		} else { +			buf[len] = key; +			SLsmg_gotorc(y, x + len++); +			SLsmg_write_char(key); +		} +		SLsmg_refresh(); + +		/* XXX more graceful overflow handling needed */ +		if (len == sizeof(buf) - 1) { +			ui_helpline__push("maximum size of symbol name reached!"); +			key = K_ENTER; +			break; +		} +next_key: +		key = ui__getch(delay_secs); +	} + +	buf[len] = '\0'; +	strncpy(input, buf, len+1); +	return key; +} + +int ui__question_window(const char *title, const char *text, +			const char *exit_msg, int delay_secs) +{ +	int x, y; +	int max_len = 0, nr_lines = 0; +	const char *t; + +	t = text; +	while (1) { +		const char *sep = strchr(t, '\n'); +		int len; + +		if (sep == NULL) +			sep = strchr(t, '\0'); +		len = sep - t; +		if (max_len < len) +			max_len = len; +		++nr_lines; +		if (*sep == '\0') +			break; +		t = sep + 1; +	} + +	max_len += 2; +	nr_lines += 4; +	y = SLtt_Screen_Rows / 2 - nr_lines / 2, +	x = SLtt_Screen_Cols / 2 - max_len / 2; + +	SLsmg_set_color(0); +	SLsmg_draw_box(y, x++, nr_lines, max_len); +	if (title) { +		SLsmg_gotorc(y, x + 1); +		SLsmg_write_string((char *)title); +	} +	SLsmg_gotorc(++y, x); +	nr_lines -= 2; +	max_len -= 2; +	SLsmg_write_wrapped_string((unsigned char *)text, y, x, +				   nr_lines, max_len, 1); +	SLsmg_gotorc(y + nr_lines - 2, x); +	SLsmg_write_nstring((char *)" ", max_len); +	SLsmg_gotorc(y + nr_lines - 1, x); +	SLsmg_write_nstring((char *)exit_msg, max_len); +	SLsmg_refresh(); +	return ui__getch(delay_secs); +} + +int ui__help_window(const char *text) +{ +	return ui__question_window("Help", text, "Press any key...", 0); +} + +int ui__dialog_yesno(const char *msg) +{ +	return ui__question_window(NULL, msg, "Enter: Yes, ESC: No", 0); +} + +static int __ui__warning(const char *title, const char *format, va_list args) +{ +	char *s; + +	if (vasprintf(&s, format, args) > 0) { +		int key; + +		pthread_mutex_lock(&ui__lock); +		key = ui__question_window(title, s, "Press any key...", 0); +		pthread_mutex_unlock(&ui__lock); +		free(s); +		return key; +	} + +	fprintf(stderr, "%s\n", title); +	vfprintf(stderr, format, args); +	return K_ESC; +} + +static int perf_tui__error(const char *format, va_list args) +{ +	return __ui__warning("Error:", format, args); +} + +static int perf_tui__warning(const char *format, va_list args) +{ +	return __ui__warning("Warning:", format, args); +} + +struct perf_error_ops perf_tui_eops = { +	.error		= perf_tui__error, +	.warning	= perf_tui__warning, +}; diff --git a/tools/perf/ui/util.c b/tools/perf/ui/util.c index ad4374a16bb..4f989774c8c 100644 --- a/tools/perf/ui/util.c +++ b/tools/perf/ui/util.c @@ -1,250 +1,85 @@ -#include "../util.h" -#include <signal.h> -#include <stdbool.h> -#include <string.h> -#include <sys/ttydefaults.h> - -#include "../cache.h" -#include "../debug.h" -#include "browser.h" -#include "keysyms.h" -#include "helpline.h" -#include "ui.h"  #include "util.h" -#include "libslang.h" - -static void ui_browser__argv_write(struct ui_browser *browser, -				   void *entry, int row) -{ -	char **arg = entry; -	bool current_entry = ui_browser__is_current_entry(browser, row); +#include "../debug.h" -	ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : -						       HE_COLORSET_NORMAL); -	slsmg_write_nstring(*arg, browser->width); -} -static int popup_menu__run(struct ui_browser *menu) +/* + * Default error logging functions + */ +static int perf_stdio__error(const char *format, va_list args)  { -	int key; - -	if (ui_browser__show(menu, " ", "ESC: exit, ENTER|->: Select option") < 0) -		return -1; - -	while (1) { -		key = ui_browser__run(menu, 0); - -		switch (key) { -		case K_RIGHT: -		case K_ENTER: -			key = menu->index; -			break; -		case K_LEFT: -		case K_ESC: -		case 'q': -		case CTRL('c'): -			key = -1; -			break; -		default: -			continue; -		} - -		break; -	} - -	ui_browser__hide(menu); -	return key; +	fprintf(stderr, "Error:\n"); +	vfprintf(stderr, format, args); +	return 0;  } -int ui__popup_menu(int argc, char * const argv[]) +static int perf_stdio__warning(const char *format, va_list args)  { -	struct ui_browser menu = { -		.entries    = (void *)argv, -		.refresh    = ui_browser__argv_refresh, -		.seek	    = ui_browser__argv_seek, -		.write	    = ui_browser__argv_write, -		.nr_entries = argc, -	}; - -	return popup_menu__run(&menu); +	fprintf(stderr, "Warning:\n"); +	vfprintf(stderr, format, args); +	return 0;  } -int ui_browser__input_window(const char *title, const char *text, char *input, -			     const char *exit_msg, int delay_secs) +static struct perf_error_ops default_eops =  { -	int x, y, len, key; -	int max_len = 60, nr_lines = 0; -	static char buf[50]; -	const char *t; - -	t = text; -	while (1) { -		const char *sep = strchr(t, '\n'); - -		if (sep == NULL) -			sep = strchr(t, '\0'); -		len = sep - t; -		if (max_len < len) -			max_len = len; -		++nr_lines; -		if (*sep == '\0') -			break; -		t = sep + 1; -	} - -	max_len += 2; -	nr_lines += 8; -	y = SLtt_Screen_Rows / 2 - nr_lines / 2; -	x = SLtt_Screen_Cols / 2 - max_len / 2; - -	SLsmg_set_color(0); -	SLsmg_draw_box(y, x++, nr_lines, max_len); -	if (title) { -		SLsmg_gotorc(y, x + 1); -		SLsmg_write_string((char *)title); -	} -	SLsmg_gotorc(++y, x); -	nr_lines -= 7; -	max_len -= 2; -	SLsmg_write_wrapped_string((unsigned char *)text, y, x, -				   nr_lines, max_len, 1); -	y += nr_lines; -	len = 5; -	while (len--) { -		SLsmg_gotorc(y + len - 1, x); -		SLsmg_write_nstring((char *)" ", max_len); -	} -	SLsmg_draw_box(y++, x + 1, 3, max_len - 2); - -	SLsmg_gotorc(y + 3, x); -	SLsmg_write_nstring((char *)exit_msg, max_len); -	SLsmg_refresh(); +	.error		= perf_stdio__error, +	.warning	= perf_stdio__warning, +}; -	x += 2; -	len = 0; -	key = ui__getch(delay_secs); -	while (key != K_TIMER && key != K_ENTER && key != K_ESC) { -		if (key == K_BKSPC) { -			if (len == 0) -				goto next_key; -			SLsmg_gotorc(y, x + --len); -			SLsmg_write_char(' '); -		} else { -			buf[len] = key; -			SLsmg_gotorc(y, x + len++); -			SLsmg_write_char(key); -		} -		SLsmg_refresh(); +static struct perf_error_ops *perf_eops = &default_eops; -		/* XXX more graceful overflow handling needed */ -		if (len == sizeof(buf) - 1) { -			ui_helpline__push("maximum size of symbol name reached!"); -			key = K_ENTER; -			break; -		} -next_key: -		key = ui__getch(delay_secs); -	} -	buf[len] = '\0'; -	strncpy(input, buf, len+1); -	return key; -} - -int ui__question_window(const char *title, const char *text, -			const char *exit_msg, int delay_secs) +int ui__error(const char *format, ...)  { -	int x, y; -	int max_len = 0, nr_lines = 0; -	const char *t; - -	t = text; -	while (1) { -		const char *sep = strchr(t, '\n'); -		int len; - -		if (sep == NULL) -			sep = strchr(t, '\0'); -		len = sep - t; -		if (max_len < len) -			max_len = len; -		++nr_lines; -		if (*sep == '\0') -			break; -		t = sep + 1; -	} - -	max_len += 2; -	nr_lines += 4; -	y = SLtt_Screen_Rows / 2 - nr_lines / 2, -	x = SLtt_Screen_Cols / 2 - max_len / 2; +	int ret; +	va_list args; -	SLsmg_set_color(0); -	SLsmg_draw_box(y, x++, nr_lines, max_len); -	if (title) { -		SLsmg_gotorc(y, x + 1); -		SLsmg_write_string((char *)title); -	} -	SLsmg_gotorc(++y, x); -	nr_lines -= 2; -	max_len -= 2; -	SLsmg_write_wrapped_string((unsigned char *)text, y, x, -				   nr_lines, max_len, 1); -	SLsmg_gotorc(y + nr_lines - 2, x); -	SLsmg_write_nstring((char *)" ", max_len); -	SLsmg_gotorc(y + nr_lines - 1, x); -	SLsmg_write_nstring((char *)exit_msg, max_len); -	SLsmg_refresh(); -	return ui__getch(delay_secs); -} +	va_start(args, format); +	ret = perf_eops->error(format, args); +	va_end(args); -int ui__help_window(const char *text) -{ -	return ui__question_window("Help", text, "Press any key...", 0); +	return ret;  } -int ui__dialog_yesno(const char *msg) -{ -	return ui__question_window(NULL, msg, "Enter: Yes, ESC: No", 0); -} - -int __ui__warning(const char *title, const char *format, va_list args) +int ui__warning(const char *format, ...)  { -	char *s; - -	if (use_browser > 0 && vasprintf(&s, format, args) > 0) { -		int key; +	int ret; +	va_list args; -		pthread_mutex_lock(&ui__lock); -		key = ui__question_window(title, s, "Press any key...", 0); -		pthread_mutex_unlock(&ui__lock); -		free(s); -		return key; -	} +	va_start(args, format); +	ret = perf_eops->warning(format, args); +	va_end(args); -	fprintf(stderr, "%s:\n", title); -	vfprintf(stderr, format, args); -	return K_ESC; +	return ret;  } -int ui__warning(const char *format, ...) + +/** + * perf_error__register - Register error logging functions + * @eops: The pointer to error logging function struct + * + * Register UI-specific error logging functions. Before calling this, + * other logging functions should be unregistered, if any. + */ +int perf_error__register(struct perf_error_ops *eops)  { -	int key; -	va_list args; +	if (perf_eops != &default_eops) +		return -1; -	va_start(args, format); -	key = __ui__warning("Warning", format, args); -	va_end(args); -	return key; +	perf_eops = eops; +	return 0;  } -int ui__error(const char *format, ...) +/** + * perf_error__unregister - Unregister error logging functions + * @eops: The pointer to error logging function struct + * + * Unregister already registered error logging functions. + */ +int perf_error__unregister(struct perf_error_ops *eops)  { -	int key; -	va_list args; +	if (perf_eops != eops) +		return -1; -	va_start(args, format); -	key = __ui__warning("Error", format, args); -	va_end(args); -	return key; +	perf_eops = &default_eops; +	return 0;  } diff --git a/tools/perf/ui/util.h b/tools/perf/ui/util.h index 2d1738bd71c..361f08c52d3 100644 --- a/tools/perf/ui/util.h +++ b/tools/perf/ui/util.h @@ -9,6 +9,13 @@ int ui__help_window(const char *text);  int ui__dialog_yesno(const char *msg);  int ui__question_window(const char *title, const char *text,  			const char *exit_msg, int delay_secs); -int __ui__warning(const char *title, const char *format, va_list args); + +struct perf_error_ops { +	int (*error)(const char *format, va_list args); +	int (*warning)(const char *format, va_list args); +}; + +int perf_error__register(struct perf_error_ops *eops); +int perf_error__unregister(struct perf_error_ops *eops);  #endif /* _PERF_UI_UTIL_H_ */ diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index efb1fce259a..4dfe0bb3c32 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -47,7 +47,7 @@ int dump_printf(const char *fmt, ...)  	return ret;  } -#ifdef NO_NEWT_SUPPORT +#if defined(NO_NEWT_SUPPORT) && defined(NO_GTK2_SUPPORT)  int ui__warning(const char *format, ...)  {  	va_list args; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 6bebe7f0a20..015c91dbc09 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -12,8 +12,9 @@ int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));  void trace_event(union perf_event *event);  struct ui_progress; +struct perf_error_ops; -#ifdef NO_NEWT_SUPPORT +#if defined(NO_NEWT_SUPPORT) && defined(NO_GTK2_SUPPORT)  static inline int ui_helpline__show_help(const char *format __used, va_list ap __used)  {  	return 0; @@ -23,12 +24,28 @@ static inline void ui_progress__update(u64 curr __used, u64 total __used,  				       const char *title __used) {}  #define ui__error(format, arg...) ui__warning(format, ##arg) -#else + +static inline int +perf_error__register(struct perf_error_ops *eops __used) +{ +	return 0; +} + +static inline int +perf_error__unregister(struct perf_error_ops *eops __used) +{ +	return 0; +} + +#else /* NO_NEWT_SUPPORT && NO_GTK2_SUPPORT */ +  extern char ui_helpline__last_msg[];  int ui_helpline__show_help(const char *format, va_list ap);  #include "../ui/progress.h"  int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2))); -#endif +#include "../ui/util.h" + +#endif /* NO_NEWT_SUPPORT && NO_GTK2_SUPPORT */  int ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2)));  int ui__error_paranoid(void); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 7400fb3fc50..f74e9560350 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -224,8 +224,8 @@ out_free_attrs:  	return err;  } -static struct perf_evsel * -	perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) +struct perf_evsel * +perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)  {  	struct perf_evsel *evsel; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 989bee9624c..40d4d3cdced 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -73,6 +73,9 @@ int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist,  #define perf_evlist__set_tracepoints_handlers_array(evlist, array) \  	perf_evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array)) +struct perf_evsel * +perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id); +  void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,  			 int cpu, int thread, u64 id); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 9f6cebd798e..e8177136486 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -15,7 +15,7 @@  #include "cpumap.h"  #include "thread_map.h"  #include "target.h" -#include "../../include/linux/perf_event.h" +#include "../../../include/linux/hw_breakpoint.h"  #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))  #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0)) @@ -78,7 +78,7 @@ static const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {  	"ref-cycles",  }; -const char *__perf_evsel__hw_name(u64 config) +static const char *__perf_evsel__hw_name(u64 config)  {  	if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config])  		return perf_evsel__hw_names[config]; @@ -86,16 +86,15 @@ const char *__perf_evsel__hw_name(u64 config)  	return "unknown-hardware";  } -static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size) +static int perf_evsel__add_modifiers(struct perf_evsel *evsel, char *bf, size_t size)  { -	int colon = 0; +	int colon = 0, r = 0;  	struct perf_event_attr *attr = &evsel->attr; -	int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(attr->config));  	bool exclude_guest_default = false;  #define MOD_PRINT(context, mod)	do {					\  		if (!attr->exclude_##context) {				\ -			if (!colon) colon = r++;			\ +			if (!colon) colon = ++r;			\  			r += scnprintf(bf + r, size - r, "%c", mod);	\  		} } while(0) @@ -108,7 +107,7 @@ static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size)  	if (attr->precise_ip) {  		if (!colon) -			colon = r++; +			colon = ++r;  		r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp");  		exclude_guest_default = true;  	} @@ -119,39 +118,211 @@ static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size)  	}  #undef MOD_PRINT  	if (colon) -		bf[colon] = ':'; +		bf[colon - 1] = ':'; +	return r; +} + +static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size) +{ +	int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->attr.config)); +	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); +} + +static const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = { +	"cpu-clock", +	"task-clock", +	"page-faults", +	"context-switches", +	"CPU-migrations", +	"minor-faults", +	"major-faults", +	"alignment-faults", +	"emulation-faults", +}; + +static const char *__perf_evsel__sw_name(u64 config) +{ +	if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config]) +		return perf_evsel__sw_names[config]; +	return "unknown-software"; +} + +static int perf_evsel__sw_name(struct perf_evsel *evsel, char *bf, size_t size) +{ +	int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->attr.config)); +	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); +} + +static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) +{ +	int r; + +	r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr); + +	if (type & HW_BREAKPOINT_R) +		r += scnprintf(bf + r, size - r, "r"); + +	if (type & HW_BREAKPOINT_W) +		r += scnprintf(bf + r, size - r, "w"); + +	if (type & HW_BREAKPOINT_X) +		r += scnprintf(bf + r, size - r, "x"); +  	return r;  } -int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size) +static int perf_evsel__bp_name(struct perf_evsel *evsel, char *bf, size_t size)  { -	int ret; +	struct perf_event_attr *attr = &evsel->attr; +	int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type); +	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); +} + +const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] +				[PERF_EVSEL__MAX_ALIASES] = { + { "L1-dcache",	"l1-d",		"l1d",		"L1-data",		}, + { "L1-icache",	"l1-i",		"l1i",		"L1-instruction",	}, + { "LLC",	"L2",							}, + { "dTLB",	"d-tlb",	"Data-TLB",				}, + { "iTLB",	"i-tlb",	"Instruction-TLB",			}, + { "branch",	"branches",	"bpu",		"btb",		"bpc",	}, + { "node",								}, +}; + +const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX] +				   [PERF_EVSEL__MAX_ALIASES] = { + { "load",	"loads",	"read",					}, + { "store",	"stores",	"write",				}, + { "prefetch",	"prefetches",	"speculative-read", "speculative-load",	}, +}; + +const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] +				       [PERF_EVSEL__MAX_ALIASES] = { + { "refs",	"Reference",	"ops",		"access",		}, + { "misses",	"miss",							}, +}; + +#define C(x)		PERF_COUNT_HW_CACHE_##x +#define CACHE_READ	(1 << C(OP_READ)) +#define CACHE_WRITE	(1 << C(OP_WRITE)) +#define CACHE_PREFETCH	(1 << C(OP_PREFETCH)) +#define COP(x)		(1 << x) + +/* + * cache operartion stat + * L1I : Read and prefetch only + * ITLB and BPU : Read-only + */ +static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = { + [C(L1D)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(L1I)]	= (CACHE_READ | CACHE_PREFETCH), + [C(LL)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(DTLB)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(ITLB)]	= (CACHE_READ), + [C(BPU)]	= (CACHE_READ), + [C(NODE)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), +}; + +bool perf_evsel__is_cache_op_valid(u8 type, u8 op) +{ +	if (perf_evsel__hw_cache_stat[type] & COP(op)) +		return true;	/* valid */ +	else +		return false;	/* invalid */ +} + +int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, +					    char *bf, size_t size) +{ +	if (result) { +		return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0], +				 perf_evsel__hw_cache_op[op][0], +				 perf_evsel__hw_cache_result[result][0]); +	} + +	return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0], +			 perf_evsel__hw_cache_op[op][1]); +} + +static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size) +{ +	u8 op, result, type = (config >>  0) & 0xff; +	const char *err = "unknown-ext-hardware-cache-type"; + +	if (type > PERF_COUNT_HW_CACHE_MAX) +		goto out_err; + +	op = (config >>  8) & 0xff; +	err = "unknown-ext-hardware-cache-op"; +	if (op > PERF_COUNT_HW_CACHE_OP_MAX) +		goto out_err; + +	result = (config >> 16) & 0xff; +	err = "unknown-ext-hardware-cache-result"; +	if (result > PERF_COUNT_HW_CACHE_RESULT_MAX) +		goto out_err; + +	err = "invalid-cache"; +	if (!perf_evsel__is_cache_op_valid(type, op)) +		goto out_err; + +	return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size); +out_err: +	return scnprintf(bf, size, "%s", err); +} + +static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t size) +{ +	int ret = __perf_evsel__hw_cache_name(evsel->attr.config, bf, size); +	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); +} + +static int perf_evsel__raw_name(struct perf_evsel *evsel, char *bf, size_t size) +{ +	int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->attr.config); +	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); +} + +const char *perf_evsel__name(struct perf_evsel *evsel) +{ +	char bf[128]; + +	if (evsel->name) +		return evsel->name;  	switch (evsel->attr.type) {  	case PERF_TYPE_RAW: -		ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->attr.config); +		perf_evsel__raw_name(evsel, bf, sizeof(bf));  		break;  	case PERF_TYPE_HARDWARE: -		ret = perf_evsel__hw_name(evsel, bf, size); +		perf_evsel__hw_name(evsel, bf, sizeof(bf)); +		break; + +	case PERF_TYPE_HW_CACHE: +		perf_evsel__hw_cache_name(evsel, bf, sizeof(bf));  		break; + +	case PERF_TYPE_SOFTWARE: +		perf_evsel__sw_name(evsel, bf, sizeof(bf)); +		break; + +	case PERF_TYPE_TRACEPOINT: +		scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint"); +		break; + +	case PERF_TYPE_BREAKPOINT: +		perf_evsel__bp_name(evsel, bf, sizeof(bf)); +		break; +  	default: -		/* -		 * FIXME - 		 * -		 * This is the minimal perf_evsel__name so that we can -		 * reconstruct event names taking into account event modifiers. -		 * -		 * The old event_name uses it now for raw anr hw events, so that -		 * we don't drag all the parsing stuff into the python binding. -		 * -		 * On the next devel cycle the rest of the event naming will be -		 * brought here. - 		 */ -		return 0; +		scnprintf(bf, sizeof(bf), "%s", "unknown attr type"); +		break;  	} -	return ret; +	evsel->name = strdup(bf); + +	return evsel->name ?: "unknown";  }  void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts, diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4ba8b564e6f..67cc5033d19 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -83,8 +83,19 @@ void perf_evsel__config(struct perf_evsel *evsel,  			struct perf_record_opts *opts,  			struct perf_evsel *first); -const char* __perf_evsel__hw_name(u64 config); -int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size); +bool perf_evsel__is_cache_op_valid(u8 type, u8 op); + +#define PERF_EVSEL__MAX_ALIASES 8 + +extern const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] +				       [PERF_EVSEL__MAX_ALIASES]; +extern const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX] +					  [PERF_EVSEL__MAX_ALIASES]; +const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] +				       [PERF_EVSEL__MAX_ALIASES]; +int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, +					    char *bf, size_t size); +const char *perf_evsel__name(struct perf_evsel *evsel);  int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);  int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e909d43cf54..5a47aba4675 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -641,7 +641,7 @@ static int write_event_desc(int fd, struct perf_header *h __used,  		/*  		 * write event string as passed on cmdline  		 */ -		ret = do_write_string(fd, event_name(attr)); +		ret = do_write_string(fd, perf_evsel__name(attr));  		if (ret < 0)  			return ret;  		/* @@ -1474,15 +1474,15 @@ out:  static int process_tracing_data(struct perf_file_section *section __unused,  			      struct perf_header *ph __unused, -			      int feat __unused, int fd) +			      int feat __unused, int fd, void *data)  { -	trace_report(fd, false); +	trace_report(fd, data, false);  	return 0;  }  static int process_build_id(struct perf_file_section *section,  			    struct perf_header *ph, -			    int feat __unused, int fd) +			    int feat __unused, int fd, void *data __used)  {  	if (perf_header__read_build_ids(ph, fd, section->offset, section->size))  		pr_debug("Failed to read buildids, continuing...\n"); @@ -1493,7 +1493,7 @@ struct feature_ops {  	int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);  	void (*print)(struct perf_header *h, int fd, FILE *fp);  	int (*process)(struct perf_file_section *section, -		       struct perf_header *h, int feat, int fd); +		       struct perf_header *h, int feat, int fd, void *data);  	const char *name;  	bool full_only;  }; @@ -1988,7 +1988,7 @@ int perf_file_header__read(struct perf_file_header *header,  static int perf_file_section__process(struct perf_file_section *section,  				      struct perf_header *ph, -				      int feat, int fd, void *data __used) +				      int feat, int fd, void *data)  {  	if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) {  		pr_debug("Failed to lseek to %" PRIu64 " offset for feature " @@ -2004,7 +2004,7 @@ static int perf_file_section__process(struct perf_file_section *section,  	if (!feat_ops[feat].process)  		return 0; -	return feat_ops[feat].process(section, ph, feat, fd); +	return feat_ops[feat].process(section, ph, feat, fd, data);  }  static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, @@ -2093,9 +2093,11 @@ static int read_attr(int fd, struct perf_header *ph,  	return ret <= 0 ? -1 : 0;  } -static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel) +static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel, +					   struct pevent *pevent)  { -	struct event_format *event = trace_find_event(evsel->attr.config); +	struct event_format *event = pevent_find_event(pevent, +						       evsel->attr.config);  	char bf[128];  	if (event == NULL) @@ -2109,13 +2111,14 @@ static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel)  	return 0;  } -static int perf_evlist__set_tracepoint_names(struct perf_evlist *evlist) +static int perf_evlist__set_tracepoint_names(struct perf_evlist *evlist, +					     struct pevent *pevent)  {  	struct perf_evsel *pos;  	list_for_each_entry(pos, &evlist->entries, node) {  		if (pos->attr.type == PERF_TYPE_TRACEPOINT && -		    perf_evsel__set_tracepoint_name(pos)) +		    perf_evsel__set_tracepoint_name(pos, pevent))  			return -1;  	} @@ -2198,12 +2201,12 @@ int perf_session__read_header(struct perf_session *session, int fd)  		event_count =  f_header.event_types.size / sizeof(struct perf_trace_event_type);  	} -	perf_header__process_sections(header, fd, NULL, +	perf_header__process_sections(header, fd, &session->pevent,  				      perf_file_section__process);  	lseek(fd, header->data_offset, SEEK_SET); -	if (perf_evlist__set_tracepoint_names(session->evlist)) +	if (perf_evlist__set_tracepoint_names(session->evlist, session->pevent))  		goto out_delete_evlist;  	header->frozen = 1; @@ -2419,8 +2422,8 @@ int perf_event__process_tracing_data(union perf_event *event,  	lseek(session->fd, offset + sizeof(struct tracing_data_event),  	      SEEK_SET); -	size_read = trace_report(session->fd, session->repipe); - +	size_read = trace_report(session->fd, &session->pevent, +				 session->repipe);  	padding = ALIGN(size_read, sizeof(u64)) - size_read;  	if (read(session->fd, buf, padding) < 0) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 34bb556d621..0b096c27a41 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -47,6 +47,7 @@ enum hist_column {  	HISTC_SYMBOL_TO,  	HISTC_DSO_FROM,  	HISTC_DSO_TO, +	HISTC_SRCLINE,  	HISTC_NR_COLS, /* Last entry */  }; diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h index 1eb804fd3fb..b6842c1d02a 100644 --- a/tools/perf/util/include/linux/kernel.h +++ b/tools/perf/util/include/linux/kernel.h @@ -108,4 +108,14 @@ int eprintf(int level,  #define pr_debug3(fmt, ...) pr_debugN(3, pr_fmt(fmt), ##__VA_ARGS__)  #define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__) +/* + * This looks more complex than it should be. But we need to + * get the type for the ~ right in round_down (it needs to be + * as wide as the result!), and we want to evaluate the macro + * arguments just once each. + */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) +  #endif diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 81371bad4ef..c14c665d9a2 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -157,7 +157,7 @@ void machine__exit(struct machine *self);  void machine__delete(struct machine *self);  int machine__resolve_callchain(struct machine *machine, -			       struct perf_evsel *evsel, struct thread *thread, +			       struct thread *thread,  			       struct ip_callchain *chain,  			       struct symbol **parent);  int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name, diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c index 76b98e2a587..1b997d2b89c 100644 --- a/tools/perf/util/parse-events-test.c +++ b/tools/perf/util/parse-events-test.c @@ -181,6 +181,22 @@ static int test__checkevent_breakpoint_w(struct perf_evlist *evlist)  	return 0;  } +static int test__checkevent_breakpoint_rw(struct perf_evlist *evlist) +{ +	struct perf_evsel *evsel = list_entry(evlist->entries.next, +					      struct perf_evsel, node); + +	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); +	TEST_ASSERT_VAL("wrong type", +			PERF_TYPE_BREAKPOINT == evsel->attr.type); +	TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); +	TEST_ASSERT_VAL("wrong bp_type", +		(HW_BREAKPOINT_R|HW_BREAKPOINT_W) == evsel->attr.bp_type); +	TEST_ASSERT_VAL("wrong bp_len", +			HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len); +	return 0; +} +  static int test__checkevent_tracepoint_modifier(struct perf_evlist *evlist)  {  	struct perf_evsel *evsel = list_entry(evlist->entries.next, @@ -309,6 +325,8 @@ static int test__checkevent_breakpoint_modifier(struct perf_evlist *evlist)  	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);  	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);  	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); +	TEST_ASSERT_VAL("wrong name", +			!strcmp(perf_evsel__name(evsel), "mem:0x0:rw:u"));  	return test__checkevent_breakpoint(evlist);  } @@ -322,6 +340,8 @@ static int test__checkevent_breakpoint_x_modifier(struct perf_evlist *evlist)  	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);  	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);  	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); +	TEST_ASSERT_VAL("wrong name", +			!strcmp(perf_evsel__name(evsel), "mem:0x0:x:k"));  	return test__checkevent_breakpoint_x(evlist);  } @@ -335,6 +355,8 @@ static int test__checkevent_breakpoint_r_modifier(struct perf_evlist *evlist)  	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);  	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);  	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); +	TEST_ASSERT_VAL("wrong name", +			!strcmp(perf_evsel__name(evsel), "mem:0x0:r:hp"));  	return test__checkevent_breakpoint_r(evlist);  } @@ -348,10 +370,27 @@ static int test__checkevent_breakpoint_w_modifier(struct perf_evlist *evlist)  	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);  	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);  	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); +	TEST_ASSERT_VAL("wrong name", +			!strcmp(perf_evsel__name(evsel), "mem:0x0:w:up"));  	return test__checkevent_breakpoint_w(evlist);  } +static int test__checkevent_breakpoint_rw_modifier(struct perf_evlist *evlist) +{ +	struct perf_evsel *evsel = list_entry(evlist->entries.next, +					      struct perf_evsel, node); + +	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); +	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); +	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); +	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); +	TEST_ASSERT_VAL("wrong name", +			!strcmp(perf_evsel__name(evsel), "mem:0x0:rw:kp")); + +	return test__checkevent_breakpoint_rw(evlist); +} +  static int test__checkevent_pmu(struct perf_evlist *evlist)  { @@ -413,19 +452,63 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist)  {  	struct perf_evsel *evsel; -	/* cpu/config=1,name=krava1/u */ +	/* cpu/config=1,name=krava/u */  	evsel = list_entry(evlist->entries.next, struct perf_evsel, node);  	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);  	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);  	TEST_ASSERT_VAL("wrong config",  1 == evsel->attr.config); -	TEST_ASSERT_VAL("wrong name", !strcmp(evsel->name, "krava")); +	TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "krava")); -	/* cpu/config=2/" */ +	/* cpu/config=2/u" */  	evsel = list_entry(evsel->node.next, struct perf_evsel, node);  	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);  	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);  	TEST_ASSERT_VAL("wrong config",  2 == evsel->attr.config); -	TEST_ASSERT_VAL("wrong name", !strcmp(evsel->name, "raw 0x2")); +	TEST_ASSERT_VAL("wrong name", +			!strcmp(perf_evsel__name(evsel), "raw 0x2:u")); + +	return 0; +} + +static int test__checkterms_simple(struct list_head *terms) +{ +	struct parse_events__term *term; + +	/* config=10 */ +	term = list_entry(terms->next, struct parse_events__term, list); +	TEST_ASSERT_VAL("wrong type term", +			term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG); +	TEST_ASSERT_VAL("wrong type val", +			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); +	TEST_ASSERT_VAL("wrong val", term->val.num == 10); +	TEST_ASSERT_VAL("wrong config", !term->config); + +	/* config1 */ +	term = list_entry(term->list.next, struct parse_events__term, list); +	TEST_ASSERT_VAL("wrong type term", +			term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG1); +	TEST_ASSERT_VAL("wrong type val", +			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); +	TEST_ASSERT_VAL("wrong val", term->val.num == 1); +	TEST_ASSERT_VAL("wrong config", !term->config); + +	/* config2=3 */ +	term = list_entry(term->list.next, struct parse_events__term, list); +	TEST_ASSERT_VAL("wrong type term", +			term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG2); +	TEST_ASSERT_VAL("wrong type val", +			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); +	TEST_ASSERT_VAL("wrong val", term->val.num == 3); +	TEST_ASSERT_VAL("wrong config", !term->config); + +	/* umask=1*/ +	term = list_entry(term->list.next, struct parse_events__term, list); +	TEST_ASSERT_VAL("wrong type term", +			term->type_term == PARSE_EVENTS__TERM_TYPE_USER); +	TEST_ASSERT_VAL("wrong type val", +			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); +	TEST_ASSERT_VAL("wrong val", term->val.num == 1); +	TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "umask"));  	return 0;  } @@ -541,10 +624,16 @@ static struct test__event_st test__events[] = {  		.name  = "instructions:H",  		.check = test__checkevent_exclude_guest_modifier,  	}, +	[26] = { +		.name  = "mem:0:rw", +		.check = test__checkevent_breakpoint_rw, +	}, +	[27] = { +		.name  = "mem:0:rw:kp", +		.check = test__checkevent_breakpoint_rw_modifier, +	},  }; -#define TEST__EVENTS_CNT (sizeof(test__events) / sizeof(struct test__event_st)) -  static struct test__event_st test__events_pmu[] = {  	[0] = {  		.name  = "cpu/config=10,config1,config2=3,period=1000/u", @@ -556,10 +645,23 @@ static struct test__event_st test__events_pmu[] = {  	},  }; -#define TEST__EVENTS_PMU_CNT (sizeof(test__events_pmu) / \ -			      sizeof(struct test__event_st)) +struct test__term { +	const char *str; +	__u32 type; +	int (*check)(struct list_head *terms); +}; + +static struct test__term test__terms[] = { +	[0] = { +		.str   = "config=10,config1,config2=3,umask=1", +		.check = test__checkterms_simple, +	}, +}; -static int test(struct test__event_st *e) +#define TEST__TERMS_CNT (sizeof(test__terms) / \ +			 sizeof(struct test__term)) + +static int test_event(struct test__event_st *e)  {  	struct perf_evlist *evlist;  	int ret; @@ -590,7 +692,48 @@ static int test_events(struct test__event_st *events, unsigned cnt)  		struct test__event_st *e = &events[i];  		pr_debug("running test %d '%s'\n", i, e->name); -		ret = test(e); +		ret = test_event(e); +		if (ret) +			break; +	} + +	return ret; +} + +static int test_term(struct test__term *t) +{ +	struct list_head *terms; +	int ret; + +	terms = malloc(sizeof(*terms)); +	if (!terms) +		return -ENOMEM; + +	INIT_LIST_HEAD(terms); + +	ret = parse_events_terms(terms, t->str); +	if (ret) { +		pr_debug("failed to parse terms '%s', err %d\n", +			 t->str , ret); +		return ret; +	} + +	ret = t->check(terms); +	parse_events__free_terms(terms); + +	return ret; +} + +static int test_terms(struct test__term *terms, unsigned cnt) +{ +	int ret = 0; +	unsigned i; + +	for (i = 0; i < cnt; i++) { +		struct test__term *t = &terms[i]; + +		pr_debug("running test %d '%s'\n", i, t->str); +		ret = test_term(t);  		if (ret)  			break;  	} @@ -617,9 +760,17 @@ int parse_events__test(void)  {  	int ret; -	ret = test_events(test__events, TEST__EVENTS_CNT); -	if (!ret && test_pmu()) -		ret = test_events(test__events_pmu, TEST__EVENTS_PMU_CNT); +#define TEST_EVENTS(tests)				\ +do {							\ +	ret = test_events(tests, ARRAY_SIZE(tests));	\ +	if (ret)					\ +		return ret;				\ +} while (0) -	return ret; +	TEST_EVENTS(test__events); + +	if (test_pmu()) +		TEST_EVENTS(test__events_pmu); + +	return test_terms(test__terms, ARRAY_SIZE(test__terms));  } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 05dbc8b3c76..1aa721d7c10 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -11,14 +11,14 @@  #include "cache.h"  #include "header.h"  #include "debugfs.h" +#include "parse-events-bison.h" +#define YY_EXTRA_TYPE int  #include "parse-events-flex.h"  #include "pmu.h"  #define MAX_NAME_LEN 100  struct event_symbol { -	u8		type; -	u64		config;  	const char	*symbol;  	const char	*alias;  }; @@ -26,32 +26,88 @@ struct event_symbol {  #ifdef PARSER_DEBUG  extern int parse_events_debug;  #endif -int parse_events_parse(struct list_head *list, int *idx); +int parse_events_parse(void *data, void *scanner); -#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x -#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x - -static struct event_symbol event_symbols[] = { -  { CHW(CPU_CYCLES),			"cpu-cycles",			"cycles"		}, -  { CHW(STALLED_CYCLES_FRONTEND),	"stalled-cycles-frontend",	"idle-cycles-frontend"	}, -  { CHW(STALLED_CYCLES_BACKEND),	"stalled-cycles-backend",	"idle-cycles-backend"	}, -  { CHW(INSTRUCTIONS),			"instructions",			""			}, -  { CHW(CACHE_REFERENCES),		"cache-references",		""			}, -  { CHW(CACHE_MISSES),			"cache-misses",			""			}, -  { CHW(BRANCH_INSTRUCTIONS),		"branch-instructions",		"branches"		}, -  { CHW(BRANCH_MISSES),			"branch-misses",		""			}, -  { CHW(BUS_CYCLES),			"bus-cycles",			""			}, -  { CHW(REF_CPU_CYCLES),		"ref-cycles",			""			}, +static struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { +	[PERF_COUNT_HW_CPU_CYCLES] = { +		.symbol = "cpu-cycles", +		.alias  = "cycles", +	}, +	[PERF_COUNT_HW_INSTRUCTIONS] = { +		.symbol = "instructions", +		.alias  = "", +	}, +	[PERF_COUNT_HW_CACHE_REFERENCES] = { +		.symbol = "cache-references", +		.alias  = "", +	}, +	[PERF_COUNT_HW_CACHE_MISSES] = { +		.symbol = "cache-misses", +		.alias  = "", +	}, +	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { +		.symbol = "branch-instructions", +		.alias  = "branches", +	}, +	[PERF_COUNT_HW_BRANCH_MISSES] = { +		.symbol = "branch-misses", +		.alias  = "", +	}, +	[PERF_COUNT_HW_BUS_CYCLES] = { +		.symbol = "bus-cycles", +		.alias  = "", +	}, +	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = { +		.symbol = "stalled-cycles-frontend", +		.alias  = "idle-cycles-frontend", +	}, +	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = { +		.symbol = "stalled-cycles-backend", +		.alias  = "idle-cycles-backend", +	}, +	[PERF_COUNT_HW_REF_CPU_CYCLES] = { +		.symbol = "ref-cycles", +		.alias  = "", +	}, +}; -  { CSW(CPU_CLOCK),			"cpu-clock",			""			}, -  { CSW(TASK_CLOCK),			"task-clock",			""			}, -  { CSW(PAGE_FAULTS),			"page-faults",			"faults"		}, -  { CSW(PAGE_FAULTS_MIN),		"minor-faults",			""			}, -  { CSW(PAGE_FAULTS_MAJ),		"major-faults",			""			}, -  { CSW(CONTEXT_SWITCHES),		"context-switches",		"cs"			}, -  { CSW(CPU_MIGRATIONS),		"cpu-migrations",		"migrations"		}, -  { CSW(ALIGNMENT_FAULTS),		"alignment-faults",		""			}, -  { CSW(EMULATION_FAULTS),		"emulation-faults",		""			}, +static struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { +	[PERF_COUNT_SW_CPU_CLOCK] = { +		.symbol = "cpu-clock", +		.alias  = "", +	}, +	[PERF_COUNT_SW_TASK_CLOCK] = { +		.symbol = "task-clock", +		.alias  = "", +	}, +	[PERF_COUNT_SW_PAGE_FAULTS] = { +		.symbol = "page-faults", +		.alias  = "faults", +	}, +	[PERF_COUNT_SW_CONTEXT_SWITCHES] = { +		.symbol = "context-switches", +		.alias  = "cs", +	}, +	[PERF_COUNT_SW_CPU_MIGRATIONS] = { +		.symbol = "cpu-migrations", +		.alias  = "migrations", +	}, +	[PERF_COUNT_SW_PAGE_FAULTS_MIN] = { +		.symbol = "minor-faults", +		.alias  = "", +	}, +	[PERF_COUNT_SW_PAGE_FAULTS_MAJ] = { +		.symbol = "major-faults", +		.alias  = "", +	}, +	[PERF_COUNT_SW_ALIGNMENT_FAULTS] = { +		.symbol = "alignment-faults", +		.alias  = "", +	}, +	[PERF_COUNT_SW_EMULATION_FAULTS] = { +		.symbol = "emulation-faults", +		.alias  = "", +	},  };  #define __PERF_EVENT_FIELD(config, name) \ @@ -62,63 +118,6 @@ static struct event_symbol event_symbols[] = {  #define PERF_EVENT_TYPE(config)		__PERF_EVENT_FIELD(config, TYPE)  #define PERF_EVENT_ID(config)		__PERF_EVENT_FIELD(config, EVENT) -static const char *sw_event_names[PERF_COUNT_SW_MAX] = { -	"cpu-clock", -	"task-clock", -	"page-faults", -	"context-switches", -	"CPU-migrations", -	"minor-faults", -	"major-faults", -	"alignment-faults", -	"emulation-faults", -}; - -#define MAX_ALIASES 8 - -static const char *hw_cache[PERF_COUNT_HW_CACHE_MAX][MAX_ALIASES] = { - { "L1-dcache",	"l1-d",		"l1d",		"L1-data",		}, - { "L1-icache",	"l1-i",		"l1i",		"L1-instruction",	}, - { "LLC",	"L2",							}, - { "dTLB",	"d-tlb",	"Data-TLB",				}, - { "iTLB",	"i-tlb",	"Instruction-TLB",			}, - { "branch",	"branches",	"bpu",		"btb",		"bpc",	}, - { "node",								}, -}; - -static const char *hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][MAX_ALIASES] = { - { "load",	"loads",	"read",					}, - { "store",	"stores",	"write",				}, - { "prefetch",	"prefetches",	"speculative-read", "speculative-load",	}, -}; - -static const char *hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] -				  [MAX_ALIASES] = { - { "refs",	"Reference",	"ops",		"access",		}, - { "misses",	"miss",							}, -}; - -#define C(x)		PERF_COUNT_HW_CACHE_##x -#define CACHE_READ	(1 << C(OP_READ)) -#define CACHE_WRITE	(1 << C(OP_WRITE)) -#define CACHE_PREFETCH	(1 << C(OP_PREFETCH)) -#define COP(x)		(1 << x) - -/* - * cache operartion stat - * L1I : Read and prefetch only - * ITLB and BPU : Read-only - */ -static unsigned long hw_cache_stat[C(MAX)] = { - [C(L1D)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), - [C(L1I)]	= (CACHE_READ | CACHE_PREFETCH), - [C(LL)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), - [C(DTLB)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), - [C(ITLB)]	= (CACHE_READ), - [C(BPU)]	= (CACHE_READ), - [C(NODE)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), -}; -  #define for_each_subsystem(sys_dir, sys_dirent, sys_next)	       \  	while (!readdir_r(sys_dir, &sys_dirent, &sys_next) && sys_next)	       \  	if (sys_dirent.d_type == DT_DIR &&				       \ @@ -218,48 +217,6 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)  	return NULL;  } -#define TP_PATH_LEN (MAX_EVENT_LENGTH * 2 + 1) -static const char *tracepoint_id_to_name(u64 config) -{ -	static char buf[TP_PATH_LEN]; -	struct tracepoint_path *path; - -	path = tracepoint_id_to_path(config); -	if (path) { -		snprintf(buf, TP_PATH_LEN, "%s:%s", path->system, path->name); -		free(path->name); -		free(path->system); -		free(path); -	} else -		snprintf(buf, TP_PATH_LEN, "%s:%s", "unknown", "unknown"); - -	return buf; -} - -static int is_cache_op_valid(u8 cache_type, u8 cache_op) -{ -	if (hw_cache_stat[cache_type] & COP(cache_op)) -		return 1;	/* valid */ -	else -		return 0;	/* invalid */ -} - -static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result) -{ -	static char name[50]; - -	if (cache_result) { -		sprintf(name, "%s-%s-%s", hw_cache[cache_type][0], -			hw_cache_op[cache_op][0], -			hw_cache_result[cache_result][0]); -	} else { -		sprintf(name, "%s-%s", hw_cache[cache_type][0], -			hw_cache_op[cache_op][1]); -	} - -	return name; -} -  const char *event_type(int type)  {  	switch (type) { @@ -282,76 +239,6 @@ const char *event_type(int type)  	return "unknown";  } -const char *event_name(struct perf_evsel *evsel) -{ -	u64 config = evsel->attr.config; -	int type = evsel->attr.type; - -	if (type == PERF_TYPE_RAW || type == PERF_TYPE_HARDWARE) { -		/* - 		 * XXX minimal fix, see comment on perf_evsen__name, this static buffer - 		 * will go away together with event_name in the next devel cycle. - 		 */ -		static char bf[128]; -		perf_evsel__name(evsel, bf, sizeof(bf)); -		return bf; -	} - -	if (evsel->name) -		return evsel->name; - -	return __event_name(type, config); -} - -const char *__event_name(int type, u64 config) -{ -	static char buf[32]; - -	if (type == PERF_TYPE_RAW) { -		sprintf(buf, "raw 0x%" PRIx64, config); -		return buf; -	} - -	switch (type) { -	case PERF_TYPE_HARDWARE: -		return __perf_evsel__hw_name(config); - -	case PERF_TYPE_HW_CACHE: { -		u8 cache_type, cache_op, cache_result; - -		cache_type   = (config >>  0) & 0xff; -		if (cache_type > PERF_COUNT_HW_CACHE_MAX) -			return "unknown-ext-hardware-cache-type"; - -		cache_op     = (config >>  8) & 0xff; -		if (cache_op > PERF_COUNT_HW_CACHE_OP_MAX) -			return "unknown-ext-hardware-cache-op"; - -		cache_result = (config >> 16) & 0xff; -		if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX) -			return "unknown-ext-hardware-cache-result"; - -		if (!is_cache_op_valid(cache_type, cache_op)) -			return "invalid-cache"; - -		return event_cache_name(cache_type, cache_op, cache_result); -	} - -	case PERF_TYPE_SOFTWARE: -		if (config < PERF_COUNT_SW_MAX && sw_event_names[config]) -			return sw_event_names[config]; -		return "unknown-software"; - -	case PERF_TYPE_TRACEPOINT: -		return tracepoint_id_to_name(config); - -	default: -		break; -	} - -	return "unknown"; -} -  static int add_event(struct list_head **_list, int *idx,  		     struct perf_event_attr *attr, char *name)  { @@ -373,19 +260,20 @@ static int add_event(struct list_head **_list, int *idx,  		return -ENOMEM;  	} -	evsel->name = strdup(name); +	if (name) +		evsel->name = strdup(name);  	list_add_tail(&evsel->node, list);  	*_list = list;  	return 0;  } -static int parse_aliases(char *str, const char *names[][MAX_ALIASES], int size) +static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)  {  	int i, j;  	int n, longest = -1;  	for (i = 0; i < size; i++) { -		for (j = 0; j < MAX_ALIASES && names[i][j]; j++) { +		for (j = 0; j < PERF_EVSEL__MAX_ALIASES && names[i][j]; j++) {  			n = strlen(names[i][j]);  			if (n > longest && !strncasecmp(str, names[i][j], n))  				longest = n; @@ -410,7 +298,7 @@ int parse_events_add_cache(struct list_head **list, int *idx,  	 * No fallback - if we cannot get a clear cache type  	 * then bail out:  	 */ -	cache_type = parse_aliases(type, hw_cache, +	cache_type = parse_aliases(type, perf_evsel__hw_cache,  				   PERF_COUNT_HW_CACHE_MAX);  	if (cache_type == -1)  		return -EINVAL; @@ -423,18 +311,18 @@ int parse_events_add_cache(struct list_head **list, int *idx,  		snprintf(name + n, MAX_NAME_LEN - n, "-%s\n", str);  		if (cache_op == -1) { -			cache_op = parse_aliases(str, hw_cache_op, +			cache_op = parse_aliases(str, perf_evsel__hw_cache_op,  						 PERF_COUNT_HW_CACHE_OP_MAX);  			if (cache_op >= 0) { -				if (!is_cache_op_valid(cache_type, cache_op)) +				if (!perf_evsel__is_cache_op_valid(cache_type, cache_op))  					return -EINVAL;  				continue;  			}  		}  		if (cache_result == -1) { -			cache_result = parse_aliases(str, hw_cache_result, -						PERF_COUNT_HW_CACHE_RESULT_MAX); +			cache_result = parse_aliases(str, perf_evsel__hw_cache_result, +						     PERF_COUNT_HW_CACHE_RESULT_MAX);  			if (cache_result >= 0)  				continue;  		} @@ -549,21 +437,31 @@ parse_breakpoint_type(const char *type, struct perf_event_attr *attr)  		if (!type || !type[i])  			break; +#define CHECK_SET_TYPE(bit)		\ +do {					\ +	if (attr->bp_type & bit)	\ +		return -EINVAL;		\ +	else				\ +		attr->bp_type |= bit;	\ +} while (0) +  		switch (type[i]) {  		case 'r': -			attr->bp_type |= HW_BREAKPOINT_R; +			CHECK_SET_TYPE(HW_BREAKPOINT_R);  			break;  		case 'w': -			attr->bp_type |= HW_BREAKPOINT_W; +			CHECK_SET_TYPE(HW_BREAKPOINT_W);  			break;  		case 'x': -			attr->bp_type |= HW_BREAKPOINT_X; +			CHECK_SET_TYPE(HW_BREAKPOINT_X);  			break;  		default:  			return -EINVAL;  		}  	} +#undef CHECK_SET_TYPE +  	if (!attr->bp_type) /* Default */  		attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W; @@ -574,7 +472,6 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx,  				void *ptr, char *type)  {  	struct perf_event_attr attr; -	char name[MAX_NAME_LEN];  	memset(&attr, 0, sizeof(attr));  	attr.bp_addr = (unsigned long) ptr; @@ -593,8 +490,7 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx,  	attr.type = PERF_TYPE_BREAKPOINT; -	snprintf(name, MAX_NAME_LEN, "mem:%p:%s", ptr, type ? type : "rw"); -	return add_event(list, idx, &attr, name); +	return add_event(list, idx, &attr, NULL);  }  static int config_term(struct perf_event_attr *attr, @@ -666,8 +562,7 @@ int parse_events_add_numeric(struct list_head **list, int *idx,  	    config_attr(&attr, head_config, 1))  		return -EINVAL; -	return add_event(list, idx, &attr, -			 (char *) __event_name(type, config)); +	return add_event(list, idx, &attr, NULL);  }  static int parse_events__is_name_term(struct parse_events__term *term) @@ -675,8 +570,7 @@ static int parse_events__is_name_term(struct parse_events__term *term)  	return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME;  } -static char *pmu_event_name(struct perf_event_attr *attr, -			    struct list_head *head_terms) +static char *pmu_event_name(struct list_head *head_terms)  {  	struct parse_events__term *term; @@ -684,7 +578,7 @@ static char *pmu_event_name(struct perf_event_attr *attr,  		if (parse_events__is_name_term(term))  			return term->val.str; -	return (char *) __event_name(PERF_TYPE_RAW, attr->config); +	return NULL;  }  int parse_events_add_pmu(struct list_head **list, int *idx, @@ -699,6 +593,9 @@ int parse_events_add_pmu(struct list_head **list, int *idx,  	memset(&attr, 0, sizeof(attr)); +	if (perf_pmu__check_alias(pmu, head_config)) +		return -EINVAL; +  	/*  	 * Configure hardcoded terms first, no need to check  	 * return value when called with fail == 0 ;) @@ -709,7 +606,7 @@ int parse_events_add_pmu(struct list_head **list, int *idx,  		return -EINVAL;  	return add_event(list, idx, &attr, -			 pmu_event_name(&attr, head_config)); +			 pmu_event_name(head_config));  }  void parse_events_update_lists(struct list_head *list_event, @@ -787,27 +684,62 @@ int parse_events_modifier(struct list_head *list, char *str)  	return 0;  } -int parse_events(struct perf_evlist *evlist, const char *str, int unset __used) +static int parse_events__scanner(const char *str, void *data, int start_token)  { -	LIST_HEAD(list); -	LIST_HEAD(list_tmp);  	YY_BUFFER_STATE buffer; -	int ret, idx = evlist->nr_entries; +	void *scanner; +	int ret; -	buffer = parse_events__scan_string(str); +	ret = parse_events_lex_init_extra(start_token, &scanner); +	if (ret) +		return ret; + +	buffer = parse_events__scan_string(str, scanner);  #ifdef PARSER_DEBUG  	parse_events_debug = 1;  #endif -	ret = parse_events_parse(&list, &idx); +	ret = parse_events_parse(data, scanner); + +	parse_events__flush_buffer(buffer, scanner); +	parse_events__delete_buffer(buffer, scanner); +	parse_events_lex_destroy(scanner); +	return ret; +} + +/* + * parse event config string, return a list of event terms. + */ +int parse_events_terms(struct list_head *terms, const char *str) +{ +	struct parse_events_data__terms data = { +		.terms = NULL, +	}; +	int ret; -	parse_events__flush_buffer(buffer); -	parse_events__delete_buffer(buffer); -	parse_events_lex_destroy(); +	ret = parse_events__scanner(str, &data, PE_START_TERMS); +	if (!ret) { +		list_splice(data.terms, terms); +		free(data.terms); +		return 0; +	} +	parse_events__free_terms(data.terms); +	return ret; +} + +int parse_events(struct perf_evlist *evlist, const char *str, int unset __used) +{ +	struct parse_events_data__events data = { +		.list = LIST_HEAD_INIT(data.list), +		.idx  = evlist->nr_entries, +	}; +	int ret; + +	ret = parse_events__scanner(str, &data, PE_START_EVENTS);  	if (!ret) { -		int entries = idx - evlist->nr_entries; -		perf_evlist__splice_list_tail(evlist, &list, entries); +		int entries = data.idx - evlist->nr_entries; +		perf_evlist__splice_list_tail(evlist, &data.list, entries);  		return 0;  	} @@ -946,16 +878,13 @@ int is_valid_tracepoint(const char *event_string)  	return 0;  } -void print_events_type(u8 type) +static void __print_events_type(u8 type, struct event_symbol *syms, +				unsigned max)  { -	struct event_symbol *syms = event_symbols; -	unsigned int i;  	char name[64]; +	unsigned i; -	for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { -		if (type != syms->type) -			continue; - +	for (i = 0; i < max ; i++, syms++) {  		if (strlen(syms->alias))  			snprintf(name, sizeof(name),  "%s OR %s",  				 syms->symbol, syms->alias); @@ -967,19 +896,28 @@ void print_events_type(u8 type)  	}  } +void print_events_type(u8 type) +{ +	if (type == PERF_TYPE_SOFTWARE) +		__print_events_type(type, event_symbols_sw, PERF_COUNT_SW_MAX); +	else +		__print_events_type(type, event_symbols_hw, PERF_COUNT_HW_MAX); +} +  int print_hwcache_events(const char *event_glob)  {  	unsigned int type, op, i, printed = 0; +	char name[64];  	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {  		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {  			/* skip invalid cache type */ -			if (!is_cache_op_valid(type, op)) +			if (!perf_evsel__is_cache_op_valid(type, op))  				continue;  			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { -				char *name = event_cache_name(type, op, i); - +				__perf_evsel__hw_cache_type_op_res_name(type, op, i, +									name, sizeof(name));  				if (event_glob != NULL && !strglobmatch(name, event_glob))  					continue; @@ -993,26 +931,13 @@ int print_hwcache_events(const char *event_glob)  	return printed;  } -/* - * Print the help text for the event symbols: - */ -void print_events(const char *event_glob) +static void print_symbol_events(const char *event_glob, unsigned type, +				struct event_symbol *syms, unsigned max)  { -	unsigned int i, type, prev_type = -1, printed = 0, ntypes_printed = 0; -	struct event_symbol *syms = event_symbols; +	unsigned i, printed = 0;  	char name[MAX_NAME_LEN]; -	printf("\n"); -	printf("List of pre-defined events (to be used in -e):\n"); - -	for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { -		type = syms->type; - -		if (type != prev_type && printed) { -			printf("\n"); -			printed = 0; -			ntypes_printed++; -		} +	for (i = 0; i < max; i++, syms++) {  		if (event_glob != NULL &&   		    !(strglobmatch(syms->symbol, event_glob) || @@ -1023,17 +948,31 @@ void print_events(const char *event_glob)  			snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias);  		else  			strncpy(name, syms->symbol, MAX_NAME_LEN); -		printf("  %-50s [%s]\n", name, -			event_type_descriptors[type]); -		prev_type = type; -		++printed; +		printf("  %-50s [%s]\n", name, event_type_descriptors[type]); + +		printed++;  	} -	if (ntypes_printed) { -		printed = 0; +	if (printed)  		printf("\n"); -	} +} + +/* + * Print the help text for the event symbols: + */ +void print_events(const char *event_glob) +{ + +	printf("\n"); +	printf("List of pre-defined events (to be used in -e):\n"); + +	print_symbol_events(event_glob, PERF_TYPE_HARDWARE, +			    event_symbols_hw, PERF_COUNT_HW_MAX); + +	print_symbol_events(event_glob, PERF_TYPE_SOFTWARE, +			    event_symbols_sw, PERF_COUNT_SW_MAX); +  	print_hwcache_events(event_glob);  	if (event_glob != NULL) @@ -1106,6 +1045,13 @@ int parse_events__term_str(struct parse_events__term **term,  			config, str, 0);  } +int parse_events__term_clone(struct parse_events__term **new, +			     struct parse_events__term *term) +{ +	return new_term(new, term->type_val, term->type_term, term->config, +			term->val.str, term->val.num); +} +  void parse_events__free_terms(struct list_head *terms)  {  	struct parse_events__term *term, *h; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 8cac57ab4ee..ee9c218a193 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -26,13 +26,12 @@ extern struct tracepoint_path *tracepoint_id_to_path(u64 config);  extern bool have_tracepoints(struct list_head *evlist);  const char *event_type(int type); -const char *event_name(struct perf_evsel *event); -extern const char *__event_name(int type, u64 config);  extern int parse_events_option(const struct option *opt, const char *str,  			       int unset);  extern int parse_events(struct perf_evlist *evlist, const char *str,  			int unset); +extern int parse_events_terms(struct list_head *terms, const char *str);  extern int parse_filter(const struct option *opt, const char *str, int unset);  #define EVENTS_HELP_MAX (128*1024) @@ -63,11 +62,22 @@ struct parse_events__term {  	struct list_head list;  }; +struct parse_events_data__events { +	struct list_head list; +	int idx; +}; + +struct parse_events_data__terms { +	struct list_head *terms; +}; +  int parse_events__is_hardcoded_term(struct parse_events__term *term);  int parse_events__term_num(struct parse_events__term **_term,  			   int type_term, char *config, long num);  int parse_events__term_str(struct parse_events__term **_term,  			   int type_term, char *config, char *str); +int parse_events__term_clone(struct parse_events__term **new, +			     struct parse_events__term *term);  void parse_events__free_terms(struct list_head *terms);  int parse_events_modifier(struct list_head *list, char *str);  int parse_events_add_tracepoint(struct list_head **list, int *idx, @@ -83,8 +93,7 @@ int parse_events_add_pmu(struct list_head **list, int *idx,  			 char *pmu , struct list_head *head_config);  void parse_events_update_lists(struct list_head *list_event,  			       struct list_head *list_all); -void parse_events_error(struct list_head *list_all, -			int *idx, char const *msg); +void parse_events_error(void *data, void *scanner, char const *msg);  int parse_events__test(void);  void print_events(const char *event_glob); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 618a8e78839..384ca74c6b2 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -1,4 +1,6 @@ +%option reentrant +%option bison-bridge  %option prefix="parse_events_"  %option stack @@ -8,7 +10,10 @@  #include "parse-events-bison.h"  #include "parse-events.h" -static int __value(char *str, int base, int token) +char *parse_events_get_text(yyscan_t yyscanner); +YYSTYPE *parse_events_get_lval(yyscan_t yyscanner); + +static int __value(YYSTYPE *yylval, char *str, int base, int token)  {  	long num; @@ -17,35 +22,48 @@ static int __value(char *str, int base, int token)  	if (errno)  		return PE_ERROR; -	parse_events_lval.num = num; +	yylval->num = num;  	return token;  } -static int value(int base) +static int value(yyscan_t scanner, int base)  { -	return __value(parse_events_text, base, PE_VALUE); +	YYSTYPE *yylval = parse_events_get_lval(scanner); +	char *text = parse_events_get_text(scanner); + +	return __value(yylval, text, base, PE_VALUE);  } -static int raw(void) +static int raw(yyscan_t scanner)  { -	return __value(parse_events_text + 1, 16, PE_RAW); +	YYSTYPE *yylval = parse_events_get_lval(scanner); +	char *text = parse_events_get_text(scanner); + +	return __value(yylval, text + 1, 16, PE_RAW);  } -static int str(int token) +static int str(yyscan_t scanner, int token)  { -	parse_events_lval.str = strdup(parse_events_text); +	YYSTYPE *yylval = parse_events_get_lval(scanner); +	char *text = parse_events_get_text(scanner); + +	yylval->str = strdup(text);  	return token;  } -static int sym(int type, int config) +static int sym(yyscan_t scanner, int type, int config)  { -	parse_events_lval.num = (type << 16) + config; -	return PE_VALUE_SYM; +	YYSTYPE *yylval = parse_events_get_lval(scanner); + +	yylval->num = (type << 16) + config; +	return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW;  } -static int term(int type) +static int term(yyscan_t scanner, int type)  { -	parse_events_lval.num = type; +	YYSTYPE *yylval = parse_events_get_lval(scanner); + +	yylval->num = type;  	return PE_TERM;  } @@ -58,28 +76,41 @@ num_hex		0x[a-fA-F0-9]+  num_raw_hex	[a-fA-F0-9]+  name		[a-zA-Z_*?][a-zA-Z0-9_*?]*  modifier_event	[ukhpGH]{1,8} -modifier_bp	[rwx] +modifier_bp	[rwx]{1,3}  %% -cpu-cycles|cycles				{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); } -stalled-cycles-frontend|idle-cycles-frontend	{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } -stalled-cycles-backend|idle-cycles-backend	{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } -instructions					{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); } -cache-references				{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); } -cache-misses					{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); } -branch-instructions|branches			{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); } -branch-misses					{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); } -bus-cycles					{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); } -ref-cycles					{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); } -cpu-clock					{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); } -task-clock					{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); } -page-faults|faults				{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); } -minor-faults					{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); } -major-faults					{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); } -context-switches|cs				{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); } -cpu-migrations|migrations			{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); } -alignment-faults				{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } -emulation-faults				{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } + +%{ +	{ +		int start_token; + +		start_token = (int) parse_events_get_extra(yyscanner); +		if (start_token) { +			parse_events_set_extra(NULL, yyscanner); +			return start_token; +		} +         } +%} + +cpu-cycles|cycles				{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); } +stalled-cycles-frontend|idle-cycles-frontend	{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } +stalled-cycles-backend|idle-cycles-backend	{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } +instructions					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); } +cache-references				{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); } +cache-misses					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); } +branch-instructions|branches			{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); } +branch-misses					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); } +bus-cycles					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); } +ref-cycles					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); } +cpu-clock					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); } +task-clock					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); } +page-faults|faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); } +minor-faults					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); } +major-faults					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); } +context-switches|cs				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); } +cpu-migrations|migrations			{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); } +alignment-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } +emulation-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }  L1-dcache|l1-d|l1d|L1-data		|  L1-icache|l1-i|l1i|L1-instruction	| @@ -87,14 +118,14 @@ LLC|L2					|  dTLB|d-tlb|Data-TLB			|  iTLB|i-tlb|Instruction-TLB		|  branch|branches|bpu|btb|bpc		| -node					{ return str(PE_NAME_CACHE_TYPE); } +node					{ return str(yyscanner, PE_NAME_CACHE_TYPE); }  load|loads|read				|  store|stores|write			|  prefetch|prefetches			|  speculative-read|speculative-load	|  refs|Reference|ops|access		| -misses|miss				{ return str(PE_NAME_CACHE_OP_RESULT); } +misses|miss				{ return str(yyscanner, PE_NAME_CACHE_OP_RESULT); }  	/*  	 * These are event config hardcoded term names to be specified @@ -102,38 +133,39 @@ misses|miss				{ return str(PE_NAME_CACHE_OP_RESULT); }  	 * so we can put them here directly. In case the we have a conflict  	 * in future, this needs to go into '//' condition block.  	 */ -config			{ return term(PARSE_EVENTS__TERM_TYPE_CONFIG); } -config1			{ return term(PARSE_EVENTS__TERM_TYPE_CONFIG1); } -config2			{ return term(PARSE_EVENTS__TERM_TYPE_CONFIG2); } -name			{ return term(PARSE_EVENTS__TERM_TYPE_NAME); } -period			{ return term(PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } -branch_type		{ return term(PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); } +config			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } +config1			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } +config2			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } +name			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); } +period			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } +branch_type		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }  mem:			{ BEGIN(mem); return PE_PREFIX_MEM; } -r{num_raw_hex}		{ return raw(); } -{num_dec}		{ return value(10); } -{num_hex}		{ return value(16); } +r{num_raw_hex}		{ return raw(yyscanner); } +{num_dec}		{ return value(yyscanner, 10); } +{num_hex}		{ return value(yyscanner, 16); } -{modifier_event}	{ return str(PE_MODIFIER_EVENT); } -{name}			{ return str(PE_NAME); } +{modifier_event}	{ return str(yyscanner, PE_MODIFIER_EVENT); } +{name}			{ return str(yyscanner, PE_NAME); }  "/"			{ return '/'; }  -			{ return '-'; }  ,			{ return ','; }  :			{ return ':'; }  =			{ return '='; } +\n			{ }  <mem>{ -{modifier_bp}		{ return str(PE_MODIFIER_BP); } +{modifier_bp}		{ return str(yyscanner, PE_MODIFIER_BP); }  :			{ return ':'; } -{num_dec}		{ return value(10); } -{num_hex}		{ return value(16); } +{num_dec}		{ return value(yyscanner, 10); } +{num_hex}		{ return value(yyscanner, 16); }  	/*  	 * We need to separate 'mem:' scanner part, in order to get specific  	 * modifier bits parsed out. Otherwise we would need to handle PE_NAME  	 * and we'd need to parse it manually. During the escape from <mem>  	 * state we need to put the escaping char back, so we dont miss it.  	 */ -.			{ unput(*parse_events_text); BEGIN(INITIAL); } +.			{ unput(*yytext); BEGIN(INITIAL); }  	/*  	 * We destroy the scanner after reaching EOF,  	 * but anyway just to be sure get back to INIT state. @@ -143,7 +175,7 @@ r{num_raw_hex}		{ return raw(); }  %% -int parse_events_wrap(void) +int parse_events_wrap(void *scanner __used)  {  	return 1;  } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 362cc59332a..2bc5fbff2b5 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -1,7 +1,8 @@ - +%pure-parser  %name-prefix "parse_events_" -%parse-param {struct list_head *list_all} -%parse-param {int *idx} +%parse-param {void *_data} +%parse-param {void *scanner} +%lex-param {void* scanner}  %{ @@ -12,8 +13,9 @@  #include "types.h"  #include "util.h"  #include "parse-events.h" +#include "parse-events-bison.h" -extern int parse_events_lex (void); +extern int parse_events_lex (YYSTYPE* lvalp, void* scanner);  #define ABORT_ON(val) \  do { \ @@ -23,14 +25,16 @@ do { \  %} -%token PE_VALUE PE_VALUE_SYM PE_RAW PE_TERM +%token PE_START_EVENTS PE_START_TERMS +%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW PE_TERM  %token PE_NAME  %token PE_MODIFIER_EVENT PE_MODIFIER_BP  %token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT  %token PE_PREFIX_MEM PE_PREFIX_RAW  %token PE_ERROR  %type <num> PE_VALUE -%type <num> PE_VALUE_SYM +%type <num> PE_VALUE_SYM_HW +%type <num> PE_VALUE_SYM_SW  %type <num> PE_RAW  %type <num> PE_TERM  %type <str> PE_NAME @@ -38,6 +42,7 @@ do { \  %type <str> PE_NAME_CACHE_OP_RESULT  %type <str> PE_MODIFIER_EVENT  %type <str> PE_MODIFIER_BP +%type <num> value_sym  %type <head> event_config  %type <term> event_term  %type <head> event_pmu @@ -58,24 +63,33 @@ do { \  }  %% +start: +PE_START_EVENTS events +| +PE_START_TERMS  terms +  events:  events ',' event | event  event:  event_def PE_MODIFIER_EVENT  { +	struct parse_events_data__events *data = _data; +  	/*  	 * Apply modifier on all events added by single event definition  	 * (there could be more events added for multiple tracepoint  	 * definitions via '*?'.  	 */  	ABORT_ON(parse_events_modifier($1, $2)); -	parse_events_update_lists($1, list_all); +	parse_events_update_lists($1, &data->list);  }  |  event_def  { -	parse_events_update_lists($1, list_all); +	struct parse_events_data__events *data = _data; + +	parse_events_update_lists($1, &data->list);  }  event_def: event_pmu | @@ -89,104 +103,131 @@ event_def: event_pmu |  event_pmu:  PE_NAME '/' event_config '/'  { +	struct parse_events_data__events *data = _data;  	struct list_head *list = NULL; -	ABORT_ON(parse_events_add_pmu(&list, idx, $1, $3)); +	ABORT_ON(parse_events_add_pmu(&list, &data->idx, $1, $3));  	parse_events__free_terms($3);  	$$ = list;  } +value_sym: +PE_VALUE_SYM_HW +| +PE_VALUE_SYM_SW +  event_legacy_symbol: -PE_VALUE_SYM '/' event_config '/' +value_sym '/' event_config '/'  { +	struct parse_events_data__events *data = _data;  	struct list_head *list = NULL;  	int type = $1 >> 16;  	int config = $1 & 255; -	ABORT_ON(parse_events_add_numeric(&list, idx, type, config, $3)); +	ABORT_ON(parse_events_add_numeric(&list, &data->idx, +					  type, config, $3));  	parse_events__free_terms($3);  	$$ = list;  }  | -PE_VALUE_SYM sep_slash_dc +value_sym sep_slash_dc  { +	struct parse_events_data__events *data = _data;  	struct list_head *list = NULL;  	int type = $1 >> 16;  	int config = $1 & 255; -	ABORT_ON(parse_events_add_numeric(&list, idx, type, config, NULL)); +	ABORT_ON(parse_events_add_numeric(&list, &data->idx, +					  type, config, NULL));  	$$ = list;  }  event_legacy_cache:  PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT  { +	struct parse_events_data__events *data = _data;  	struct list_head *list = NULL; -	ABORT_ON(parse_events_add_cache(&list, idx, $1, $3, $5)); +	ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, $5));  	$$ = list;  }  |  PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT  { +	struct parse_events_data__events *data = _data;  	struct list_head *list = NULL; -	ABORT_ON(parse_events_add_cache(&list, idx, $1, $3, NULL)); +	ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, NULL));  	$$ = list;  }  |  PE_NAME_CACHE_TYPE  { +	struct parse_events_data__events *data = _data;  	struct list_head *list = NULL; -	ABORT_ON(parse_events_add_cache(&list, idx, $1, NULL, NULL)); +	ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, NULL, NULL));  	$$ = list;  }  event_legacy_mem:  PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc  { +	struct parse_events_data__events *data = _data;  	struct list_head *list = NULL; -	ABORT_ON(parse_events_add_breakpoint(&list, idx, (void *) $2, $4)); +	ABORT_ON(parse_events_add_breakpoint(&list, &data->idx, +					     (void *) $2, $4));  	$$ = list;  }  |  PE_PREFIX_MEM PE_VALUE sep_dc  { +	struct parse_events_data__events *data = _data;  	struct list_head *list = NULL; -	ABORT_ON(parse_events_add_breakpoint(&list, idx, (void *) $2, NULL)); +	ABORT_ON(parse_events_add_breakpoint(&list, &data->idx, +					     (void *) $2, NULL));  	$$ = list;  }  event_legacy_tracepoint:  PE_NAME ':' PE_NAME  { +	struct parse_events_data__events *data = _data;  	struct list_head *list = NULL; -	ABORT_ON(parse_events_add_tracepoint(&list, idx, $1, $3)); +	ABORT_ON(parse_events_add_tracepoint(&list, &data->idx, $1, $3));  	$$ = list;  }  event_legacy_numeric:  PE_VALUE ':' PE_VALUE  { +	struct parse_events_data__events *data = _data;  	struct list_head *list = NULL; -	ABORT_ON(parse_events_add_numeric(&list, idx, $1, $3, NULL)); +	ABORT_ON(parse_events_add_numeric(&list, &data->idx, $1, $3, NULL));  	$$ = list;  }  event_legacy_raw:  PE_RAW  { +	struct parse_events_data__events *data = _data;  	struct list_head *list = NULL; -	ABORT_ON(parse_events_add_numeric(&list, idx, PERF_TYPE_RAW, $1, NULL)); +	ABORT_ON(parse_events_add_numeric(&list, &data->idx, +					  PERF_TYPE_RAW, $1, NULL));  	$$ = list;  } +terms: event_config +{ +	struct parse_events_data__terms *data = _data; +	data->terms = $1; +} +  event_config:  event_config ',' event_term  { @@ -267,8 +308,7 @@ sep_slash_dc: '/' | ':' |  %% -void parse_events_error(struct list_head *list_all __used, -			int *idx __used, +void parse_events_error(void *data __used, void *scanner __used,  			char const *msg __used)  {  } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index a119a537169..67715a42cd6 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -72,7 +72,7 @@ static int pmu_format(char *name, struct list_head *format)  		 "%s/bus/event_source/devices/%s/format", sysfs, name);  	if (stat(path, &st) < 0) -		return -1; +		return 0;	/* no error if format does not exist */  	if (pmu_format_parse(path, format))  		return -1; @@ -80,6 +80,114 @@ static int pmu_format(char *name, struct list_head *format)  	return 0;  } +static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file) +{ +	struct perf_pmu__alias *alias; +	char buf[256]; +	int ret; + +	ret = fread(buf, 1, sizeof(buf), file); +	if (ret == 0) +		return -EINVAL; +	buf[ret] = 0; + +	alias = malloc(sizeof(*alias)); +	if (!alias) +		return -ENOMEM; + +	INIT_LIST_HEAD(&alias->terms); +	ret = parse_events_terms(&alias->terms, buf); +	if (ret) { +		free(alias); +		return ret; +	} + +	alias->name = strdup(name); +	list_add_tail(&alias->list, list); +	return 0; +} + +/* + * Process all the sysfs attributes located under the directory + * specified in 'dir' parameter. + */ +static int pmu_aliases_parse(char *dir, struct list_head *head) +{ +	struct dirent *evt_ent; +	DIR *event_dir; +	int ret = 0; + +	event_dir = opendir(dir); +	if (!event_dir) +		return -EINVAL; + +	while (!ret && (evt_ent = readdir(event_dir))) { +		char path[PATH_MAX]; +		char *name = evt_ent->d_name; +		FILE *file; + +		if (!strcmp(name, ".") || !strcmp(name, "..")) +			continue; + +		snprintf(path, PATH_MAX, "%s/%s", dir, name); + +		ret = -EINVAL; +		file = fopen(path, "r"); +		if (!file) +			break; +		ret = perf_pmu__new_alias(head, name, file); +		fclose(file); +	} + +	closedir(event_dir); +	return ret; +} + +/* + * Reading the pmu event aliases definition, which should be located at: + * /sys/bus/event_source/devices/<dev>/events as sysfs group attributes. + */ +static int pmu_aliases(char *name, struct list_head *head) +{ +	struct stat st; +	char path[PATH_MAX]; +	const char *sysfs; + +	sysfs = sysfs_find_mountpoint(); +	if (!sysfs) +		return -1; + +	snprintf(path, PATH_MAX, +		 "%s/bus/event_source/devices/%s/events", sysfs, name); + +	if (stat(path, &st) < 0) +		return -1; + +	if (pmu_aliases_parse(path, head)) +		return -1; + +	return 0; +} + +static int pmu_alias_terms(struct perf_pmu__alias *alias, +			   struct list_head *terms) +{ +	struct parse_events__term *term, *clone; +	LIST_HEAD(list); +	int ret; + +	list_for_each_entry(term, &alias->terms, list) { +		ret = parse_events__term_clone(&clone, term); +		if (ret) { +			parse_events__free_terms(&list); +			return ret; +		} +		list_add_tail(&clone->list, &list); +	} +	list_splice(&list, terms); +	return 0; +} +  /*   * Reading/parsing the default pmu type value, which should be   * located at: @@ -118,6 +226,7 @@ static struct perf_pmu *pmu_lookup(char *name)  {  	struct perf_pmu *pmu;  	LIST_HEAD(format); +	LIST_HEAD(aliases);  	__u32 type;  	/* @@ -135,10 +244,15 @@ static struct perf_pmu *pmu_lookup(char *name)  	if (!pmu)  		return NULL; +	pmu_aliases(name, &aliases); +  	INIT_LIST_HEAD(&pmu->format); +	INIT_LIST_HEAD(&pmu->aliases);  	list_splice(&format, &pmu->format); +	list_splice(&aliases, &pmu->aliases);  	pmu->name = strdup(name);  	pmu->type = type; +	list_add_tail(&pmu->list, &pmus);  	return pmu;  } @@ -279,6 +393,59 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,  	return pmu_config(&pmu->format, attr, head_terms);  } +static struct perf_pmu__alias *pmu_find_alias(struct perf_pmu *pmu, +					      struct parse_events__term *term) +{ +	struct perf_pmu__alias *alias; +	char *name; + +	if (parse_events__is_hardcoded_term(term)) +		return NULL; + +	if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { +		if (term->val.num != 1) +			return NULL; +		if (pmu_find_format(&pmu->format, term->config)) +			return NULL; +		name = term->config; +	} else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) { +		if (strcasecmp(term->config, "event")) +			return NULL; +		name = term->val.str; +	} else { +		return NULL; +	} + +	list_for_each_entry(alias, &pmu->aliases, list) { +		if (!strcasecmp(alias->name, name)) +			return alias; +	} +	return NULL; +} + +/* + * Find alias in the terms list and replace it with the terms + * defined for the alias + */ +int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms) +{ +	struct parse_events__term *term, *h; +	struct perf_pmu__alias *alias; +	int ret; + +	list_for_each_entry_safe(term, h, head_terms, list) { +		alias = pmu_find_alias(pmu, term); +		if (!alias) +			continue; +		ret = pmu_alias_terms(alias, &term->list); +		if (ret) +			return ret; +		list_del(&term->list); +		free(term); +	} +	return 0; +} +  int perf_pmu__new_format(struct list_head *list, char *name,  			 int config, unsigned long *bits)  { diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 68c0db965e1..535f2c5258a 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -19,17 +19,26 @@ struct perf_pmu__format {  	struct list_head list;  }; +struct perf_pmu__alias { +	char *name; +	struct list_head terms; +	struct list_head list; +}; +  struct perf_pmu {  	char *name;  	__u32 type;  	struct list_head format; +	struct list_head aliases;  	struct list_head list;  };  struct perf_pmu *perf_pmu__find(char *name);  int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,  		     struct list_head *head_terms); - +int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms); +struct list_head *perf_pmu__alias(struct perf_pmu *pmu, +				struct list_head *head_terms);  int perf_pmu_wrap(void);  void perf_pmu_error(struct list_head *list, char *name, char const *msg); diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 4c1b3d72a1d..02dfa19a467 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -209,6 +209,10 @@ static void define_event_symbols(struct event_format *event,  		define_symbolic_values(args->symbol.symbols, ev_name,  				       cur_field_name);  		break; +	case PRINT_HEX: +		define_event_symbols(event, ev_name, args->hex.field); +		define_event_symbols(event, ev_name, args->hex.size); +		break;  	case PRINT_BSTRING:  	case PRINT_DYNAMIC_ARRAY:  	case PRINT_STRING: @@ -233,7 +237,8 @@ static void define_event_symbols(struct event_format *event,  		define_event_symbols(event, ev_name, args->next);  } -static inline struct event_format *find_cache_event(int type) +static inline +struct event_format *find_cache_event(struct pevent *pevent, int type)  {  	static char ev_name[256];  	struct event_format *event; @@ -241,7 +246,7 @@ static inline struct event_format *find_cache_event(int type)  	if (events[type])  		return events[type]; -	events[type] = event = trace_find_event(type); +	events[type] = event = pevent_find_event(pevent, type);  	if (!event)  		return NULL; @@ -252,7 +257,8 @@ static inline struct event_format *find_cache_event(int type)  	return event;  } -static void perl_process_tracepoint(union perf_event *pevent __unused, +static void perl_process_tracepoint(union perf_event *perf_event __unused, +				    struct pevent *pevent,  				    struct perf_sample *sample,  				    struct perf_evsel *evsel,  				    struct machine *machine __unused, @@ -275,13 +281,13 @@ static void perl_process_tracepoint(union perf_event *pevent __unused,  	if (evsel->attr.type != PERF_TYPE_TRACEPOINT)  		return; -	type = trace_parse_common_type(data); +	type = trace_parse_common_type(pevent, data); -	event = find_cache_event(type); +	event = find_cache_event(pevent, type);  	if (!event)  		die("ug! no event found for type %d", type); -	pid = trace_parse_common_pid(data); +	pid = trace_parse_common_pid(pevent, data);  	sprintf(handler, "%s::%s", event->system, event->name); @@ -314,7 +320,8 @@ static void perl_process_tracepoint(union perf_event *pevent __unused,  				offset = field->offset;  			XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0)));  		} else { /* FIELD_IS_NUMERIC */ -			val = read_size(data + field->offset, field->size); +			val = read_size(pevent, data + field->offset, +					field->size);  			if (field->flags & FIELD_IS_SIGNED) {  				XPUSHs(sv_2mortal(newSViv(val)));  			} else { @@ -368,14 +375,15 @@ static void perl_process_event_generic(union perf_event *pevent __unused,  	LEAVE;  } -static void perl_process_event(union perf_event *pevent, +static void perl_process_event(union perf_event *event, +			       struct pevent *pevent,  			       struct perf_sample *sample,  			       struct perf_evsel *evsel,  			       struct machine *machine,  			       struct thread *thread)  { -	perl_process_tracepoint(pevent, sample, evsel, machine, thread); -	perl_process_event_generic(pevent, sample, evsel, machine, thread); +	perl_process_tracepoint(event, pevent, sample, evsel, machine, thread); +	perl_process_event_generic(event, sample, evsel, machine, thread);  }  static void run_start_sub(void) @@ -448,7 +456,7 @@ static int perl_stop_script(void)  	return 0;  } -static int perl_generate_script(const char *outfile) +static int perl_generate_script(struct pevent *pevent, const char *outfile)  {  	struct event_format *event = NULL;  	struct format_field *f; @@ -495,7 +503,7 @@ static int perl_generate_script(const char *outfile)  	fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n");  	fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n\n"); -	while ((event = trace_find_next_event(event))) { +	while ((event = trace_find_next_event(pevent, event))) {  		fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name);  		fprintf(ofp, "\tmy ("); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index acb9795286c..ce4d1b0c386 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -166,6 +166,10 @@ static void define_event_symbols(struct event_format *event,  		define_values(PRINT_SYMBOL, args->symbol.symbols, ev_name,  			      cur_field_name);  		break; +	case PRINT_HEX: +		define_event_symbols(event, ev_name, args->hex.field); +		define_event_symbols(event, ev_name, args->hex.size); +		break;  	case PRINT_STRING:  		break;  	case PRINT_TYPE: @@ -190,7 +194,8 @@ static void define_event_symbols(struct event_format *event,  		define_event_symbols(event, ev_name, args->next);  } -static inline struct event_format *find_cache_event(int type) +static inline +struct event_format *find_cache_event(struct pevent *pevent, int type)  {  	static char ev_name[256];  	struct event_format *event; @@ -198,7 +203,7 @@ static inline struct event_format *find_cache_event(int type)  	if (events[type])  		return events[type]; -	events[type] = event = trace_find_event(type); +	events[type] = event = pevent_find_event(pevent, type);  	if (!event)  		return NULL; @@ -209,7 +214,8 @@ static inline struct event_format *find_cache_event(int type)  	return event;  } -static void python_process_event(union perf_event *pevent __unused, +static void python_process_event(union perf_event *perf_event __unused, +				 struct pevent *pevent,  				 struct perf_sample *sample,  				 struct perf_evsel *evsel __unused,  				 struct machine *machine __unused, @@ -233,13 +239,13 @@ static void python_process_event(union perf_event *pevent __unused,  	if (!t)  		Py_FatalError("couldn't create Python tuple"); -	type = trace_parse_common_type(data); +	type = trace_parse_common_type(pevent, data); -	event = find_cache_event(type); +	event = find_cache_event(pevent, type);  	if (!event)  		die("ug! no event found for type %d", type); -	pid = trace_parse_common_pid(data); +	pid = trace_parse_common_pid(pevent, data);  	sprintf(handler_name, "%s__%s", event->system, event->name); @@ -284,7 +290,8 @@ static void python_process_event(union perf_event *pevent __unused,  				offset = field->offset;  			obj = PyString_FromString((char *)data + offset);  		} else { /* FIELD_IS_NUMERIC */ -			val = read_size(data + field->offset, field->size); +			val = read_size(pevent, data + field->offset, +					field->size);  			if (field->flags & FIELD_IS_SIGNED) {  				if ((long long)val >= LONG_MIN &&  				    (long long)val <= LONG_MAX) @@ -438,7 +445,7 @@ out:  	return err;  } -static int python_generate_script(const char *outfile) +static int python_generate_script(struct pevent *pevent, const char *outfile)  {  	struct event_format *event = NULL;  	struct format_field *f; @@ -487,7 +494,7 @@ static int python_generate_script(const char *outfile)  	fprintf(ofp, "def trace_end():\n");  	fprintf(ofp, "\tprint \"in trace_end\"\n\n"); -	while ((event = trace_find_next_event(event))) { +	while ((event = trace_find_next_event(pevent, event))) {  		fprintf(ofp, "def %s__%s(", event->system, event->name);  		fprintf(ofp, "event_name, ");  		fprintf(ofp, "context, "); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 56142d0fb8d..8e485592ca2 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -14,6 +14,7 @@  #include "sort.h"  #include "util.h"  #include "cpumap.h" +#include "event-parse.h"  static int perf_session__open(struct perf_session *self, bool force)  { @@ -289,7 +290,6 @@ struct branch_info *machine__resolve_bstack(struct machine *self,  }  int machine__resolve_callchain(struct machine *self, -			       struct perf_evsel *evsel __used,  			       struct thread *thread,  			       struct ip_callchain *chain,  			       struct symbol **parent) @@ -1449,7 +1449,7 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)  	ret += hists__fprintf_nr_events(&session->hists, fp);  	list_for_each_entry(pos, &session->evlist->entries, node) { -		ret += fprintf(fp, "%s stats:\n", event_name(pos)); +		ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos));  		ret += hists__fprintf_nr_events(&pos->hists, fp);  	} @@ -1490,8 +1490,8 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,  }  void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, -			  struct machine *machine, struct perf_evsel *evsel, -			  int print_sym, int print_dso, int print_symoffset) +			  struct machine *machine, int print_sym, +			  int print_dso, int print_symoffset)  {  	struct addr_location al;  	struct callchain_cursor_node *node; @@ -1505,7 +1505,7 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,  	if (symbol_conf.use_callchain && sample->callchain) { -		if (machine__resolve_callchain(machine, evsel, al.thread, +		if (machine__resolve_callchain(machine, al.thread,  						sample->callchain, NULL) != 0) {  			if (verbose)  				error("Failed to resolve callchain. Skipping\n"); @@ -1611,3 +1611,58 @@ void perf_session__fprintf_info(struct perf_session *session, FILE *fp,  	perf_header__fprintf_info(session, fp, full);  	fprintf(fp, "# ========\n#\n");  } + + +int __perf_session__set_tracepoints_handlers(struct perf_session *session, +					     const struct perf_evsel_str_handler *assocs, +					     size_t nr_assocs) +{ +	struct perf_evlist *evlist = session->evlist; +	struct event_format *format; +	struct perf_evsel *evsel; +	char *tracepoint, *name; +	size_t i; +	int err; + +	for (i = 0; i < nr_assocs; i++) { +		err = -ENOMEM; +		tracepoint = strdup(assocs[i].name); +		if (tracepoint == NULL) +			goto out; + +		err = -ENOENT; +		name = strchr(tracepoint, ':'); +		if (name == NULL) +			goto out_free; + +		*name++ = '\0'; +		format = pevent_find_event_by_name(session->pevent, +						   tracepoint, name); +		if (format == NULL) { +			/* +			 * Adding a handler for an event not in the session, +			 * just ignore it. +			 */ +			goto next; +		} + +		evsel = perf_evlist__find_tracepoint_by_id(evlist, format->id); +		if (evsel == NULL) +			goto next; + +		err = -EEXIST; +		if (evsel->handler.func != NULL) +			goto out_free; +		evsel->handler.func = assocs[i].handler; +next: +		free(tracepoint); +	} + +	err = 0; +out: +	return err; + +out_free: +	free(tracepoint); +	goto out; +} diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 0c702e3f0a3..7c435bde6eb 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -33,6 +33,7 @@ struct perf_session {  	struct machine		host_machine;  	struct rb_root		machines;  	struct perf_evlist	*evlist; +	struct pevent		*pevent;  	/*  	 * FIXME: Need to split this up further, we need global  	 *	  stats + per event stats. 'perf diff' also needs @@ -151,11 +152,20 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,  					    unsigned int type);  void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, -			  struct machine *machine, struct perf_evsel *evsel, -			  int print_sym, int print_dso, int print_symoffset); +			  struct machine *machine, int print_sym, +			  int print_dso, int print_symoffset);  int perf_session__cpu_bitmap(struct perf_session *session,  			     const char *cpu_list, unsigned long *cpu_bitmap);  void perf_session__fprintf_info(struct perf_session *s, FILE *fp, bool full); + +struct perf_evsel_str_handler; + +int __perf_session__set_tracepoints_handlers(struct perf_session *session, +					     const struct perf_evsel_str_handler *assocs, +					     size_t nr_assocs); + +#define perf_session__set_tracepoints_handlers(session, array) \ +	__perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array))  #endif /* __PERF_SESSION_H */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index a27237430c5..0f5a0a496bc 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -241,6 +241,54 @@ struct sort_entry sort_sym = {  	.se_width_idx	= HISTC_SYMBOL,  }; +/* --sort srcline */ + +static int64_t +sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) +{ +	return (int64_t)(right->ip - left->ip); +} + +static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, +				   size_t size, unsigned int width __used) +{ +	FILE *fp; +	char cmd[PATH_MAX + 2], *path = self->srcline, *nl; +	size_t line_len; + +	if (path != NULL) +		goto out_path; + +	snprintf(cmd, sizeof(cmd), "addr2line -e %s %016" PRIx64, +		 self->ms.map->dso->long_name, self->ip); +	fp = popen(cmd, "r"); +	if (!fp) +		goto out_ip; + +	if (getline(&path, &line_len, fp) < 0 || !line_len) +		goto out_ip; +	fclose(fp); +	self->srcline = strdup(path); +	if (self->srcline == NULL) +		goto out_ip; + +	nl = strchr(self->srcline, '\n'); +	if (nl != NULL) +		*nl = '\0'; +	path = self->srcline; +out_path: +	return repsep_snprintf(bf, size, "%s", path); +out_ip: +	return repsep_snprintf(bf, size, "%-#*llx", BITS_PER_LONG / 4, self->ip); +} + +struct sort_entry sort_srcline = { +	.se_header	= "Source:Line", +	.se_cmp		= sort__srcline_cmp, +	.se_snprintf	= hist_entry__srcline_snprintf, +	.se_width_idx	= HISTC_SRCLINE, +}; +  /* --sort parent */  static int64_t @@ -439,6 +487,7 @@ static struct sort_dimension sort_dimensions[] = {  	DIM(SORT_PARENT, "parent", sort_parent),  	DIM(SORT_CPU, "cpu", sort_cpu),  	DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), +	DIM(SORT_SRCLINE, "srcline", sort_srcline),  };  int sort_dimension__add(const char *tok) diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 472aa5a63a5..e724b26acd5 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -71,6 +71,7 @@ struct hist_entry {  	char			level;  	bool			used;  	u8			filtered; +	char			*srcline;  	struct symbol		*parent;  	union {  		unsigned long	  position; @@ -93,6 +94,7 @@ enum sort_type {  	SORT_SYM_FROM,  	SORT_SYM_TO,  	SORT_MISPREDICT, +	SORT_SRCLINE,  };  /* diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index d5836382ff2..199bc4d8905 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -313,3 +313,25 @@ int strtailcmp(const char *s1, const char *s2)  	return 0;  } +/** + * rtrim - Removes trailing whitespace from @s. + * @s: The string to be stripped. + * + * Note that the first trailing whitespace is replaced with a %NUL-terminator + * in the given string @s. Returns @s. + */ +char *rtrim(char *s) +{ +	size_t size = strlen(s); +	char *end; + +	if (!size) +		return s; + +	end = s + size - 1; +	while (end >= s && isspace(*end)) +		end--; +	*(end + 1) = '\0'; + +	return s; +} diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 3e2e5ea0f03..50958bbeb26 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1478,14 +1478,31 @@ static int elf_read_build_id(Elf *elf, void *bf, size_t size)  		goto out;  	} -	sec = elf_section_by_name(elf, &ehdr, &shdr, -				  ".note.gnu.build-id", NULL); -	if (sec == NULL) { +	/* +	 * Check following sections for notes: +	 *   '.note.gnu.build-id' +	 *   '.notes' +	 *   '.note' (VDSO specific) +	 */ +	do { +		sec = elf_section_by_name(elf, &ehdr, &shdr, +					  ".note.gnu.build-id", NULL); +		if (sec) +			break; +  		sec = elf_section_by_name(elf, &ehdr, &shdr,  					  ".notes", NULL); -		if (sec == NULL) -			goto out; -	} +		if (sec) +			break; + +		sec = elf_section_by_name(elf, &ehdr, &shdr, +					  ".note", NULL); +		if (sec) +			break; + +		return err; + +	} while (0);  	data = elf_getdata(sec, NULL);  	if (data == NULL) @@ -1590,11 +1607,62 @@ out:  	return err;  } +static int filename__read_debuglink(const char *filename, +				    char *debuglink, size_t size) +{ +	int fd, err = -1; +	Elf *elf; +	GElf_Ehdr ehdr; +	GElf_Shdr shdr; +	Elf_Data *data; +	Elf_Scn *sec; +	Elf_Kind ek; + +	fd = open(filename, O_RDONLY); +	if (fd < 0) +		goto out; + +	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); +	if (elf == NULL) { +		pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename); +		goto out_close; +	} + +	ek = elf_kind(elf); +	if (ek != ELF_K_ELF) +		goto out_close; + +	if (gelf_getehdr(elf, &ehdr) == NULL) { +		pr_err("%s: cannot get elf header.\n", __func__); +		goto out_close; +	} + +	sec = elf_section_by_name(elf, &ehdr, &shdr, +				  ".gnu_debuglink", NULL); +	if (sec == NULL) +		goto out_close; + +	data = elf_getdata(sec, NULL); +	if (data == NULL) +		goto out_close; + +	/* the start of this section is a zero-terminated string */ +	strncpy(debuglink, data->d_buf, size); + +	elf_end(elf); + +out_close: +	close(fd); +out: +	return err; +} +  char dso__symtab_origin(const struct dso *dso)  {  	static const char origin[] = {  		[SYMTAB__KALLSYMS]	      = 'k',  		[SYMTAB__JAVA_JIT]	      = 'j', +		[SYMTAB__DEBUGLINK]           = 'l',  		[SYMTAB__BUILD_ID_CACHE]      = 'B',  		[SYMTAB__FEDORA_DEBUGINFO]    = 'f',  		[SYMTAB__UBUNTU_DEBUGINFO]    = 'u', @@ -1662,10 +1730,22 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)  	 */  	want_symtab = 1;  restart: -	for (dso->symtab_type = SYMTAB__BUILD_ID_CACHE; +	for (dso->symtab_type = SYMTAB__DEBUGLINK;  	     dso->symtab_type != SYMTAB__NOT_FOUND;  	     dso->symtab_type++) {  		switch (dso->symtab_type) { +		case SYMTAB__DEBUGLINK: { +			char *debuglink; +			strncpy(name, dso->long_name, size); +			debuglink = name + dso->long_name_len; +			while (debuglink != name && *debuglink != '/') +				debuglink--; +			if (*debuglink == '/') +				debuglink++; +			filename__read_debuglink(dso->long_name, debuglink, +						 size - (debuglink - name)); +			} +			break;  		case SYMTAB__BUILD_ID_CACHE:  			/* skip the locally configured cache if a symfs is given */  			if (symbol_conf.symfs[0] || diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index af0752b1aca..a884b99017f 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -257,6 +257,7 @@ enum symtab_type {  	SYMTAB__KALLSYMS = 0,  	SYMTAB__GUEST_KALLSYMS,  	SYMTAB__JAVA_JIT, +	SYMTAB__DEBUGLINK,  	SYMTAB__BUILD_ID_CACHE,  	SYMTAB__FEDORA_DEBUGINFO,  	SYMTAB__UBUNTU_DEBUGINFO, diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index abe0e8e9506..7eeebcee291 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -65,7 +65,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)  				top->freq ? "Hz" : "");  	} -	ret += SNPRINTF(bf + ret, size - ret, "%s", event_name(top->sym_evsel)); +	ret += SNPRINTF(bf + ret, size - ret, "%s", perf_evsel__name(top->sym_evsel));  	ret += SNPRINTF(bf + ret, size - ret, "], "); diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 5dd3b5ec841..0715c843c2e 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -32,29 +32,25 @@ int header_page_size_size;  int header_page_ts_size;  int header_page_data_offset; -struct pevent *perf_pevent; -static struct pevent *pevent; -  bool latency_format; -int read_trace_init(int file_bigendian, int host_bigendian) +struct pevent *read_trace_init(int file_bigendian, int host_bigendian)  { -	if (pevent) -		return 0; - -	perf_pevent = pevent_alloc(); -	pevent = perf_pevent; +	struct pevent *pevent = pevent_alloc(); -	pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); -	pevent_set_file_bigendian(pevent, file_bigendian); -	pevent_set_host_bigendian(pevent, host_bigendian); +	if (pevent != NULL) { +		pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); +		pevent_set_file_bigendian(pevent, file_bigendian); +		pevent_set_host_bigendian(pevent, host_bigendian); +	} -	return 0; +	return pevent;  }  static int get_common_field(struct scripting_context *context,  			    int *offset, int *size, const char *type)  { +	struct pevent *pevent = context->pevent;  	struct event_format *event;  	struct format_field *field; @@ -150,7 +146,7 @@ void *raw_field_ptr(struct event_format *event, const char *name, void *data)  	return data + field->offset;  } -int trace_parse_common_type(void *data) +int trace_parse_common_type(struct pevent *pevent, void *data)  {  	struct pevent_record record; @@ -158,7 +154,7 @@ int trace_parse_common_type(void *data)  	return pevent_data_type(pevent, &record);  } -int trace_parse_common_pid(void *data) +int trace_parse_common_pid(struct pevent *pevent, void *data)  {  	struct pevent_record record; @@ -166,27 +162,21 @@ int trace_parse_common_pid(void *data)  	return pevent_data_pid(pevent, &record);  } -unsigned long long read_size(void *ptr, int size) +unsigned long long read_size(struct pevent *pevent, void *ptr, int size)  {  	return pevent_read_number(pevent, ptr, size);  } -struct event_format *trace_find_event(int type) -{ -	return pevent_find_event(pevent, type); -} - - -void print_trace_event(int cpu, void *data, int size) +void print_trace_event(struct pevent *pevent, int cpu, void *data, int size)  {  	struct event_format *event;  	struct pevent_record record;  	struct trace_seq s;  	int type; -	type = trace_parse_common_type(data); +	type = trace_parse_common_type(pevent, data); -	event = trace_find_event(type); +	event = pevent_find_event(pevent, type);  	if (!event) {  		warning("ug! no event found for type %d", type);  		return; @@ -202,8 +192,8 @@ void print_trace_event(int cpu, void *data, int size)  	trace_seq_do_printf(&s);  } -void print_event(int cpu, void *data, int size, unsigned long long nsecs, -		  char *comm) +void print_event(struct pevent *pevent, int cpu, void *data, int size, +		 unsigned long long nsecs, char *comm)  {  	struct pevent_record record;  	struct trace_seq s; @@ -226,7 +216,8 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs,  	printf("\n");  } -void parse_proc_kallsyms(char *file, unsigned int size __unused) +void parse_proc_kallsyms(struct pevent *pevent, +			 char *file, unsigned int size __unused)  {  	unsigned long long addr;  	char *func; @@ -257,7 +248,8 @@ void parse_proc_kallsyms(char *file, unsigned int size __unused)  	}  } -void parse_ftrace_printk(char *file, unsigned int size __unused) +void parse_ftrace_printk(struct pevent *pevent, +			 char *file, unsigned int size __unused)  {  	unsigned long long addr;  	char *printk; @@ -281,17 +273,19 @@ void parse_ftrace_printk(char *file, unsigned int size __unused)  	}  } -int parse_ftrace_file(char *buf, unsigned long size) +int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size)  {  	return pevent_parse_event(pevent, buf, size, "ftrace");  } -int parse_event_file(char *buf, unsigned long size, char *sys) +int parse_event_file(struct pevent *pevent, +		     char *buf, unsigned long size, char *sys)  {  	return pevent_parse_event(pevent, buf, size, sys);  } -struct event_format *trace_find_next_event(struct event_format *event) +struct event_format *trace_find_next_event(struct pevent *pevent, +					   struct event_format *event)  {  	static int idx; diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index f097e0dd6c5..719ed74a856 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -114,20 +114,20 @@ static void skip(int size)  	};  } -static unsigned int read4(void) +static unsigned int read4(struct pevent *pevent)  {  	unsigned int data;  	read_or_die(&data, 4); -	return __data2host4(perf_pevent, data); +	return __data2host4(pevent, data);  } -static unsigned long long read8(void) +static unsigned long long read8(struct pevent *pevent)  {  	unsigned long long data;  	read_or_die(&data, 8); -	return __data2host8(perf_pevent, data); +	return __data2host8(pevent, data);  }  static char *read_string(void) @@ -168,12 +168,12 @@ static char *read_string(void)  	return str;  } -static void read_proc_kallsyms(void) +static void read_proc_kallsyms(struct pevent *pevent)  {  	unsigned int size;  	char *buf; -	size = read4(); +	size = read4(pevent);  	if (!size)  		return; @@ -181,29 +181,29 @@ static void read_proc_kallsyms(void)  	read_or_die(buf, size);  	buf[size] = '\0'; -	parse_proc_kallsyms(buf, size); +	parse_proc_kallsyms(pevent, buf, size);  	free(buf);  } -static void read_ftrace_printk(void) +static void read_ftrace_printk(struct pevent *pevent)  {  	unsigned int size;  	char *buf; -	size = read4(); +	size = read4(pevent);  	if (!size)  		return;  	buf = malloc_or_die(size);  	read_or_die(buf, size); -	parse_ftrace_printk(buf, size); +	parse_ftrace_printk(pevent, buf, size);  	free(buf);  } -static void read_header_files(void) +static void read_header_files(struct pevent *pevent)  {  	unsigned long long size;  	char *header_event; @@ -214,7 +214,7 @@ static void read_header_files(void)  	if (memcmp(buf, "header_page", 12) != 0)  		die("did not read header page"); -	size = read8(); +	size = read8(pevent);  	skip(size);  	/* @@ -227,47 +227,48 @@ static void read_header_files(void)  	if (memcmp(buf, "header_event", 13) != 0)  		die("did not read header event"); -	size = read8(); +	size = read8(pevent);  	header_event = malloc_or_die(size);  	read_or_die(header_event, size);  	free(header_event);  } -static void read_ftrace_file(unsigned long long size) +static void read_ftrace_file(struct pevent *pevent, unsigned long long size)  {  	char *buf;  	buf = malloc_or_die(size);  	read_or_die(buf, size); -	parse_ftrace_file(buf, size); +	parse_ftrace_file(pevent, buf, size);  	free(buf);  } -static void read_event_file(char *sys, unsigned long long size) +static void read_event_file(struct pevent *pevent, char *sys, +			    unsigned long long size)  {  	char *buf;  	buf = malloc_or_die(size);  	read_or_die(buf, size); -	parse_event_file(buf, size, sys); +	parse_event_file(pevent, buf, size, sys);  	free(buf);  } -static void read_ftrace_files(void) +static void read_ftrace_files(struct pevent *pevent)  {  	unsigned long long size;  	int count;  	int i; -	count = read4(); +	count = read4(pevent);  	for (i = 0; i < count; i++) { -		size = read8(); -		read_ftrace_file(size); +		size = read8(pevent); +		read_ftrace_file(pevent, size);  	}  } -static void read_event_files(void) +static void read_event_files(struct pevent *pevent)  {  	unsigned long long size;  	char *sys; @@ -275,15 +276,15 @@ static void read_event_files(void)  	int count;  	int i,x; -	systems = read4(); +	systems = read4(pevent);  	for (i = 0; i < systems; i++) {  		sys = read_string(); -		count = read4(); +		count = read4(pevent);  		for (x=0; x < count; x++) { -			size = read8(); -			read_event_file(sys, size); +			size = read8(pevent); +			read_event_file(pevent, sys, size);  		}  	}  } @@ -377,7 +378,7 @@ static int calc_index(void *ptr, int cpu)  	return (unsigned long)ptr - (unsigned long)cpu_data[cpu].page;  } -struct pevent_record *trace_peek_data(int cpu) +struct pevent_record *trace_peek_data(struct pevent *pevent, int cpu)  {  	struct pevent_record *data;  	void *page = cpu_data[cpu].page; @@ -399,15 +400,15 @@ struct pevent_record *trace_peek_data(int cpu)  		/* FIXME: handle header page */  		if (header_page_ts_size != 8)  			die("expected a long long type for timestamp"); -		cpu_data[cpu].timestamp = data2host8(perf_pevent, ptr); +		cpu_data[cpu].timestamp = data2host8(pevent, ptr);  		ptr += 8;  		switch (header_page_size_size) {  		case 4: -			cpu_data[cpu].page_size = data2host4(perf_pevent, ptr); +			cpu_data[cpu].page_size = data2host4(pevent, ptr);  			ptr += 4;  			break;  		case 8: -			cpu_data[cpu].page_size = data2host8(perf_pevent, ptr); +			cpu_data[cpu].page_size = data2host8(pevent, ptr);  			ptr += 8;  			break;  		default: @@ -421,10 +422,10 @@ read_again:  	if (idx >= cpu_data[cpu].page_size) {  		get_next_page(cpu); -		return trace_peek_data(cpu); +		return trace_peek_data(pevent, cpu);  	} -	type_len_ts = data2host4(perf_pevent, ptr); +	type_len_ts = data2host4(pevent, ptr);  	ptr += 4;  	type_len = type_len4host(type_len_ts); @@ -434,14 +435,14 @@ read_again:  	case RINGBUF_TYPE_PADDING:  		if (!delta)  			die("error, hit unexpected end of page"); -		length = data2host4(perf_pevent, ptr); +		length = data2host4(pevent, ptr);  		ptr += 4;  		length *= 4;  		ptr += length;  		goto read_again;  	case RINGBUF_TYPE_TIME_EXTEND: -		extend = data2host4(perf_pevent, ptr); +		extend = data2host4(pevent, ptr);  		ptr += 4;  		extend <<= TS_SHIFT;  		extend += delta; @@ -452,7 +453,7 @@ read_again:  		ptr += 12;  		break;  	case 0: -		length = data2host4(perf_pevent, ptr); +		length = data2host4(pevent, ptr);  		ptr += 4;  		die("here! length=%d", length);  		break; @@ -477,17 +478,17 @@ read_again:  	return data;  } -struct pevent_record *trace_read_data(int cpu) +struct pevent_record *trace_read_data(struct pevent *pevent, int cpu)  {  	struct pevent_record *data; -	data = trace_peek_data(cpu); +	data = trace_peek_data(pevent, cpu);  	cpu_data[cpu].next = NULL;  	return data;  } -ssize_t trace_report(int fd, bool __repipe) +ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe)  {  	char buf[BUFSIZ];  	char test[] = { 23, 8, 68 }; @@ -519,30 +520,32 @@ ssize_t trace_report(int fd, bool __repipe)  	file_bigendian = buf[0];  	host_bigendian = bigendian(); -	read_trace_init(file_bigendian, host_bigendian); +	*ppevent = read_trace_init(file_bigendian, host_bigendian); +	if (*ppevent == NULL) +		die("read_trace_init failed");  	read_or_die(buf, 1);  	long_size = buf[0]; -	page_size = read4(); +	page_size = read4(*ppevent); -	read_header_files(); +	read_header_files(*ppevent); -	read_ftrace_files(); -	read_event_files(); -	read_proc_kallsyms(); -	read_ftrace_printk(); +	read_ftrace_files(*ppevent); +	read_event_files(*ppevent); +	read_proc_kallsyms(*ppevent); +	read_ftrace_printk(*ppevent);  	size = calc_data_size - 1;  	calc_data_size = 0;  	repipe = false;  	if (show_funcs) { -		pevent_print_funcs(perf_pevent); +		pevent_print_funcs(*ppevent);  		return size;  	}  	if (show_printk) { -		pevent_print_printk(perf_pevent); +		pevent_print_printk(*ppevent);  		return size;  	} diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 18ae6c1831d..474aa7a7df4 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -36,6 +36,7 @@ static int stop_script_unsupported(void)  }  static void process_event_unsupported(union perf_event *event __unused, +				      struct pevent *pevent __unused,  				      struct perf_sample *sample __unused,  				      struct perf_evsel *evsel __unused,  				      struct machine *machine __unused, @@ -61,7 +62,8 @@ static int python_start_script_unsupported(const char *script __unused,  	return -1;  } -static int python_generate_script_unsupported(const char *outfile __unused) +static int python_generate_script_unsupported(struct pevent *pevent __unused, +					      const char *outfile __unused)  {  	print_python_unsupported_msg(); @@ -122,7 +124,8 @@ static int perl_start_script_unsupported(const char *script __unused,  	return -1;  } -static int perl_generate_script_unsupported(const char *outfile __unused) +static int perl_generate_script_unsupported(struct pevent *pevent __unused, +					    const char *outfile __unused)  {  	print_perl_unsupported_msg(); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 639852ac111..8fef1d6687b 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -8,6 +8,7 @@  struct machine;  struct perf_sample;  union perf_event; +struct perf_tool;  struct thread;  extern int header_page_size_size; @@ -29,35 +30,36 @@ enum {  int bigendian(void); -int read_trace_init(int file_bigendian, int host_bigendian); -void print_trace_event(int cpu, void *data, int size); +struct pevent *read_trace_init(int file_bigendian, int host_bigendian); +void print_trace_event(struct pevent *pevent, int cpu, void *data, int size); -void print_event(int cpu, void *data, int size, unsigned long long nsecs, -		  char *comm); +void print_event(struct pevent *pevent, int cpu, void *data, int size, +		 unsigned long long nsecs, char *comm); -int parse_ftrace_file(char *buf, unsigned long size); -int parse_event_file(char *buf, unsigned long size, char *sys); +int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size); +int parse_event_file(struct pevent *pevent, +		     char *buf, unsigned long size, char *sys); -struct pevent_record *trace_peek_data(int cpu); -struct event_format *trace_find_event(int type); +struct pevent_record *trace_peek_data(struct pevent *pevent, int cpu);  unsigned long long  raw_field_value(struct event_format *event, const char *name, void *data);  void *raw_field_ptr(struct event_format *event, const char *name, void *data); -void parse_proc_kallsyms(char *file, unsigned int size __unused); -void parse_ftrace_printk(char *file, unsigned int size __unused); +void parse_proc_kallsyms(struct pevent *pevent, char *file, unsigned int size); +void parse_ftrace_printk(struct pevent *pevent, char *file, unsigned int size); -ssize_t trace_report(int fd, bool repipe); +ssize_t trace_report(int fd, struct pevent **pevent, bool repipe); -int trace_parse_common_type(void *data); -int trace_parse_common_pid(void *data); +int trace_parse_common_type(struct pevent *pevent, void *data); +int trace_parse_common_pid(struct pevent *pevent, void *data); -struct event_format *trace_find_next_event(struct event_format *event); -unsigned long long read_size(void *ptr, int size); +struct event_format *trace_find_next_event(struct pevent *pevent, +					   struct event_format *event); +unsigned long long read_size(struct pevent *pevent, void *ptr, int size);  unsigned long long eval_flag(const char *flag); -struct pevent_record *trace_read_data(int cpu); +struct pevent_record *trace_read_data(struct pevent *pevent, int cpu);  int read_tracing_data(int fd, struct list_head *pattrs);  struct tracing_data { @@ -77,11 +79,12 @@ struct scripting_ops {  	int (*start_script) (const char *script, int argc, const char **argv);  	int (*stop_script) (void);  	void (*process_event) (union perf_event *event, +			       struct pevent *pevent,  			       struct perf_sample *sample,  			       struct perf_evsel *evsel,  			       struct machine *machine,  			       struct thread *thread); -	int (*generate_script) (const char *outfile); +	int (*generate_script) (struct pevent *pevent, const char *outfile);  };  int script_spec_register(const char *spec, struct scripting_ops *ops); @@ -90,6 +93,7 @@ void setup_perl_scripting(void);  void setup_python_scripting(void);  struct scripting_context { +	struct pevent *pevent;  	void *event_data;  }; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 2daaedb83d8..b13c7331eaf 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -264,4 +264,6 @@ bool is_power_of_2(unsigned long n)  size_t hex_width(u64 v); +char *rtrim(char *s); +  #endif  |