diff options
29 files changed, 1264 insertions, 80 deletions
diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h index fbb519828aa..09d5f7fd9db 100644 --- a/arch/ia64/include/asm/xen/interface.h +++ b/arch/ia64/include/asm/xen/interface.h @@ -77,6 +77,7 @@ DEFINE_GUEST_HANDLE(int);  DEFINE_GUEST_HANDLE(long);  DEFINE_GUEST_HANDLE(void);  DEFINE_GUEST_HANDLE(uint64_t); +DEFINE_GUEST_HANDLE(uint32_t);  typedef unsigned long xen_pfn_t;  DEFINE_GUEST_HANDLE(xen_pfn_t); diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index a1f2db5f117..cbf0c9d50b9 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -56,6 +56,7 @@ DEFINE_GUEST_HANDLE(int);  DEFINE_GUEST_HANDLE(long);  DEFINE_GUEST_HANDLE(void);  DEFINE_GUEST_HANDLE(uint64_t); +DEFINE_GUEST_HANDLE(uint32_t);  #endif  #ifndef HYPERVISOR_VIRT_START diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index d99346ea8fd..7415aa92791 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -324,6 +324,32 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)  out:  	return ret;  } + +static void xen_initdom_restore_msi_irqs(struct pci_dev *dev, int irq) +{ +	int ret = 0; + +	if (pci_seg_supported) { +		struct physdev_pci_device restore_ext; + +		restore_ext.seg = pci_domain_nr(dev->bus); +		restore_ext.bus = dev->bus->number; +		restore_ext.devfn = dev->devfn; +		ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi_ext, +					&restore_ext); +		if (ret == -ENOSYS) +			pci_seg_supported = false; +		WARN(ret && ret != -ENOSYS, "restore_msi_ext -> %d\n", ret); +	} +	if (!pci_seg_supported) { +		struct physdev_restore_msi restore; + +		restore.bus = dev->bus->number; +		restore.devfn = dev->devfn; +		ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore); +		WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret); +	} +}  #endif  static void xen_teardown_msi_irqs(struct pci_dev *dev) @@ -446,6 +472,7 @@ int __init pci_xen_initial_domain(void)  #ifdef CONFIG_PCI_MSI  	x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;  	x86_msi.teardown_msi_irq = xen_teardown_msi_irq; +	x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;  #endif  	xen_setup_acpi_sci();  	__acpi_register_gsi = acpi_register_gsi_xen; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 4172af8ceeb..b132ade26f7 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -62,6 +62,15 @@  #include <asm/reboot.h>  #include <asm/stackprotector.h>  #include <asm/hypervisor.h> +#include <asm/mwait.h> + +#ifdef CONFIG_ACPI +#include <linux/acpi.h> +#include <asm/acpi.h> +#include <acpi/pdc_intel.h> +#include <acpi/processor.h> +#include <xen/interface/platform.h> +#endif  #include "xen-ops.h"  #include "mmu.h" @@ -200,13 +209,17 @@ static void __init xen_banner(void)  static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;  static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; +static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask; +static __read_mostly unsigned int cpuid_leaf5_ecx_val; +static __read_mostly unsigned int cpuid_leaf5_edx_val; +  static void xen_cpuid(unsigned int *ax, unsigned int *bx,  		      unsigned int *cx, unsigned int *dx)  {  	unsigned maskebx = ~0;  	unsigned maskecx = ~0;  	unsigned maskedx = ~0; - +	unsigned setecx = 0;  	/*  	 * Mask out inconvenient features, to try and disable as many  	 * unsupported kernel subsystems as possible. @@ -214,9 +227,18 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,  	switch (*ax) {  	case 1:  		maskecx = cpuid_leaf1_ecx_mask; +		setecx = cpuid_leaf1_ecx_set_mask;  		maskedx = cpuid_leaf1_edx_mask;  		break; +	case CPUID_MWAIT_LEAF: +		/* Synthesize the values.. */ +		*ax = 0; +		*bx = 0; +		*cx = cpuid_leaf5_ecx_val; +		*dx = cpuid_leaf5_edx_val; +		return; +  	case 0xb:  		/* Suppress extended topology stuff */  		maskebx = 0; @@ -232,9 +254,75 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,  	*bx &= maskebx;  	*cx &= maskecx; +	*cx |= setecx;  	*dx &= maskedx; +  } +static bool __init xen_check_mwait(void) +{ +#ifdef CONFIG_ACPI +	struct xen_platform_op op = { +		.cmd			= XENPF_set_processor_pminfo, +		.u.set_pminfo.id	= -1, +		.u.set_pminfo.type	= XEN_PM_PDC, +	}; +	uint32_t buf[3]; +	unsigned int ax, bx, cx, dx; +	unsigned int mwait_mask; + +	/* We need to determine whether it is OK to expose the MWAIT +	 * capability to the kernel to harvest deeper than C3 states from ACPI +	 * _CST using the processor_harvest_xen.c module. For this to work, we +	 * need to gather the MWAIT_LEAF values (which the cstate.c code +	 * checks against). The hypervisor won't expose the MWAIT flag because +	 * it would break backwards compatibility; so we will find out directly +	 * from the hardware and hypercall. +	 */ +	if (!xen_initial_domain()) +		return false; + +	ax = 1; +	cx = 0; + +	native_cpuid(&ax, &bx, &cx, &dx); + +	mwait_mask = (1 << (X86_FEATURE_EST % 32)) | +		     (1 << (X86_FEATURE_MWAIT % 32)); + +	if ((cx & mwait_mask) != mwait_mask) +		return false; + +	/* We need to emulate the MWAIT_LEAF and for that we need both +	 * ecx and edx. The hypercall provides only partial information. +	 */ + +	ax = CPUID_MWAIT_LEAF; +	bx = 0; +	cx = 0; +	dx = 0; + +	native_cpuid(&ax, &bx, &cx, &dx); + +	/* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so, +	 * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3. +	 */ +	buf[0] = ACPI_PDC_REVISION_ID; +	buf[1] = 1; +	buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP); + +	set_xen_guest_handle(op.u.set_pminfo.pdc, buf); + +	if ((HYPERVISOR_dom0_op(&op) == 0) && +	    (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) { +		cpuid_leaf5_ecx_val = cx; +		cpuid_leaf5_edx_val = dx; +	} +	return true; +#else +	return false; +#endif +}  static void __init xen_init_cpuid_mask(void)  {  	unsigned int ax, bx, cx, dx; @@ -261,6 +349,9 @@ static void __init xen_init_cpuid_mask(void)  	/* Xen will set CR4.OSXSAVE if supported and not disabled by force */  	if ((cx & xsave_mask) != xsave_mask)  		cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ + +	if (xen_check_mwait()) +		cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));  }  static void xen_set_debugreg(int reg, unsigned long val) @@ -777,11 +868,11 @@ static DEFINE_PER_CPU(unsigned long, xen_cr0_value);  static unsigned long xen_read_cr0(void)  { -	unsigned long cr0 = percpu_read(xen_cr0_value); +	unsigned long cr0 = this_cpu_read(xen_cr0_value);  	if (unlikely(cr0 == 0)) {  		cr0 = native_read_cr0(); -		percpu_write(xen_cr0_value, cr0); +		this_cpu_write(xen_cr0_value, cr0);  	}  	return cr0; @@ -791,7 +882,7 @@ static void xen_write_cr0(unsigned long cr0)  {  	struct multicall_space mcs; -	percpu_write(xen_cr0_value, cr0); +	this_cpu_write(xen_cr0_value, cr0);  	/* Only pay attention to cr0.TS; everything else is  	   ignored. */ diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 8bbb465b6f0..15733765797 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -26,7 +26,7 @@ static unsigned long xen_save_fl(void)  	struct vcpu_info *vcpu;  	unsigned long flags; -	vcpu = percpu_read(xen_vcpu); +	vcpu = this_cpu_read(xen_vcpu);  	/* flag has opposite sense of mask */  	flags = !vcpu->evtchn_upcall_mask; @@ -50,7 +50,7 @@ static void xen_restore_fl(unsigned long flags)  	   make sure we're don't switch CPUs between getting the vcpu  	   pointer and updating the mask. */  	preempt_disable(); -	vcpu = percpu_read(xen_vcpu); +	vcpu = this_cpu_read(xen_vcpu);  	vcpu->evtchn_upcall_mask = flags;  	preempt_enable_no_resched(); @@ -72,7 +72,7 @@ static void xen_irq_disable(void)  	   make sure we're don't switch CPUs between getting the vcpu  	   pointer and updating the mask. */  	preempt_disable(); -	percpu_read(xen_vcpu)->evtchn_upcall_mask = 1; +	this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1;  	preempt_enable_no_resched();  }  PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); @@ -86,7 +86,7 @@ static void xen_irq_enable(void)  	   the caller is confused and is trying to re-enable interrupts  	   on an indeterminate processor. */ -	vcpu = percpu_read(xen_vcpu); +	vcpu = this_cpu_read(xen_vcpu);  	vcpu->evtchn_upcall_mask = 0;  	/* Doesn't matter if we get preempted here, because any diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 95c1cf60c66..988828b479e 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1071,14 +1071,14 @@ static void drop_other_mm_ref(void *info)  	struct mm_struct *mm = info;  	struct mm_struct *active_mm; -	active_mm = percpu_read(cpu_tlbstate.active_mm); +	active_mm = this_cpu_read(cpu_tlbstate.active_mm); -	if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK) +	if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK)  		leave_mm(smp_processor_id());  	/* If this cpu still has a stale cr3 reference, then make sure  	   it has been flushed. */ -	if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) +	if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd))  		load_cr3(swapper_pg_dir);  } @@ -1185,17 +1185,17 @@ static void __init xen_pagetable_setup_done(pgd_t *base)  static void xen_write_cr2(unsigned long cr2)  { -	percpu_read(xen_vcpu)->arch.cr2 = cr2; +	this_cpu_read(xen_vcpu)->arch.cr2 = cr2;  }  static unsigned long xen_read_cr2(void)  { -	return percpu_read(xen_vcpu)->arch.cr2; +	return this_cpu_read(xen_vcpu)->arch.cr2;  }  unsigned long xen_read_cr2_direct(void)  { -	return percpu_read(xen_vcpu_info.arch.cr2); +	return this_cpu_read(xen_vcpu_info.arch.cr2);  }  static void xen_flush_tlb(void) @@ -1278,12 +1278,12 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,  static unsigned long xen_read_cr3(void)  { -	return percpu_read(xen_cr3); +	return this_cpu_read(xen_cr3);  }  static void set_current_cr3(void *v)  { -	percpu_write(xen_current_cr3, (unsigned long)v); +	this_cpu_write(xen_current_cr3, (unsigned long)v);  }  static void __xen_write_cr3(bool kernel, unsigned long cr3) @@ -1306,7 +1306,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)  	xen_extend_mmuext_op(&op);  	if (kernel) { -		percpu_write(xen_cr3, cr3); +		this_cpu_write(xen_cr3, cr3);  		/* Update xen_current_cr3 once the batch has actually  		   been submitted. */ @@ -1322,7 +1322,7 @@ static void xen_write_cr3(unsigned long cr3)  	/* Update while interrupts are disabled, so its atomic with  	   respect to ipis */ -	percpu_write(xen_cr3, cr3); +	this_cpu_write(xen_cr3, cr3);  	__xen_write_cr3(true, cr3); diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index dee79b78a90..9c2e74f9096 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -47,7 +47,7 @@ static inline void xen_mc_issue(unsigned mode)  		xen_mc_flush();  	/* restore flags saved in xen_mc_batch */ -	local_irq_restore(percpu_read(xen_mc_irq_flags)); +	local_irq_restore(this_cpu_read(xen_mc_irq_flags));  }  /* Set up a callback to be called when the current batch is flushed */ diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index e03c6369217..12366238d07 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -420,7 +420,6 @@ void __init xen_arch_setup(void)  	boot_cpu_data.hlt_works_ok = 1;  #endif  	disable_cpuidle(); -	boot_option_idle_override = IDLE_HALT;  	WARN_ON(set_pm_idle_to_default());  	fiddle_vdso();  } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 501d4e0244b..315d8fa0c8f 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -76,7 +76,7 @@ static void __cpuinit cpu_bringup(void)  	xen_setup_cpu_clockevents();  	set_cpu_online(cpu, true); -	percpu_write(cpu_state, CPU_ONLINE); +	this_cpu_write(cpu_state, CPU_ONLINE);  	wmb();  	/* We can take interrupts now: we're officially "up". */ diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index 7b46781c30c..8f675ae2091 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -558,7 +558,7 @@ config INPUT_CMA3000_I2C  config INPUT_XEN_KBDDEV_FRONTEND  	tristate "Xen virtual keyboard and mouse support" -	depends on XEN_FBDEV_FRONTEND +	depends on XEN  	default y  	select XEN_XENBUS_FRONTEND  	help diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index af295bb21d6..053670e09e2 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3163,6 +3163,31 @@ int __pci_reset_function(struct pci_dev *dev)  EXPORT_SYMBOL_GPL(__pci_reset_function);  /** + * __pci_reset_function_locked - reset a PCI device function while holding + * the @dev mutex lock. + * @dev: PCI device to reset + * + * Some devices allow an individual function to be reset without affecting + * other functions in the same device.  The PCI device must be responsive + * to PCI config space in order to use this function. + * + * The device function is presumed to be unused and the caller is holding + * the device mutex lock when this function is called. + * Resetting the device will make the contents of PCI configuration space + * random, so any caller of this must be prepared to reinitialise the + * device including MSI, bus mastering, BARs, decoding IO and memory spaces, + * etc. + * + * Returns 0 if the device function was successfully reset or negative if the + * device doesn't support resetting a single function. + */ +int __pci_reset_function_locked(struct pci_dev *dev) +{ +	return pci_dev_reset(dev, 1); +} +EXPORT_SYMBOL_GPL(__pci_reset_function_locked); + +/**   * pci_probe_reset_function - check whether the device can be safely reset   * @dev: PCI device to reset   * diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig index 48cb8d3d175..0282a83f51f 100644 --- a/drivers/tty/hvc/Kconfig +++ b/drivers/tty/hvc/Kconfig @@ -66,6 +66,14 @@ config HVC_XEN  	help  	  Xen virtual console device driver +config HVC_XEN_FRONTEND +	bool "Xen Hypervisor Multiple Consoles support" +	depends on HVC_XEN +	select XEN_XENBUS_FRONTEND +	default y +	help +	  Xen driver for secondary virtual consoles +  config HVC_UDBG         bool "udbg based fake hypervisor console"         depends on PPC && EXPERIMENTAL diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index a1b0a75c3ea..83d5c88e716 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -23,44 +23,74 @@  #include <linux/err.h>  #include <linux/init.h>  #include <linux/types.h> +#include <linux/list.h> +#include <asm/io.h>  #include <asm/xen/hypervisor.h>  #include <xen/xen.h> +#include <xen/interface/xen.h> +#include <xen/hvm.h> +#include <xen/grant_table.h>  #include <xen/page.h>  #include <xen/events.h>  #include <xen/interface/io/console.h>  #include <xen/hvc-console.h> +#include <xen/xenbus.h>  #include "hvc_console.h"  #define HVC_COOKIE   0x58656e /* "Xen" in hex */ -static struct hvc_struct *hvc; -static int xencons_irq; +struct xencons_info { +	struct list_head list; +	struct xenbus_device *xbdev; +	struct xencons_interface *intf; +	unsigned int evtchn; +	struct hvc_struct *hvc; +	int irq; +	int vtermno; +	grant_ref_t gntref; +}; + +static LIST_HEAD(xenconsoles); +static DEFINE_SPINLOCK(xencons_lock);  /* ------------------------------------------------------------------ */ -static unsigned long console_pfn = ~0ul; +static struct xencons_info *vtermno_to_xencons(int vtermno) +{ +	struct xencons_info *entry, *n, *ret = NULL; -static inline struct xencons_interface *xencons_interface(void) +	if (list_empty(&xenconsoles)) +			return NULL; + +	list_for_each_entry_safe(entry, n, &xenconsoles, list) { +		if (entry->vtermno == vtermno) { +			ret  = entry; +			break; +		} +	} + +	return ret; +} + +static inline int xenbus_devid_to_vtermno(int devid)  { -	if (console_pfn == ~0ul) -		return mfn_to_virt(xen_start_info->console.domU.mfn); -	else -		return __va(console_pfn << PAGE_SHIFT); +	return devid + HVC_COOKIE;  } -static inline void notify_daemon(void) +static inline void notify_daemon(struct xencons_info *cons)  {  	/* Use evtchn: this is called early, before irq is set up. */ -	notify_remote_via_evtchn(xen_start_info->console.domU.evtchn); +	notify_remote_via_evtchn(cons->evtchn);  } -static int __write_console(const char *data, int len) +static int __write_console(struct xencons_info *xencons, +		const char *data, int len)  { -	struct xencons_interface *intf = xencons_interface();  	XENCONS_RING_IDX cons, prod; +	struct xencons_interface *intf = xencons->intf;  	int sent = 0;  	cons = intf->out_cons; @@ -75,13 +105,16 @@ static int __write_console(const char *data, int len)  	intf->out_prod = prod;  	if (sent) -		notify_daemon(); +		notify_daemon(xencons);  	return sent;  }  static int domU_write_console(uint32_t vtermno, const char *data, int len)  {  	int ret = len; +	struct xencons_info *cons = vtermno_to_xencons(vtermno); +	if (cons == NULL) +		return -EINVAL;  	/*  	 * Make sure the whole buffer is emitted, polling if @@ -90,7 +123,7 @@ static int domU_write_console(uint32_t vtermno, const char *data, int len)  	 * kernel is crippled.  	 */  	while (len) { -		int sent = __write_console(data, len); +		int sent = __write_console(cons, data, len);  		data += sent;  		len -= sent; @@ -104,9 +137,13 @@ static int domU_write_console(uint32_t vtermno, const char *data, int len)  static int domU_read_console(uint32_t vtermno, char *buf, int len)  { -	struct xencons_interface *intf = xencons_interface(); +	struct xencons_interface *intf;  	XENCONS_RING_IDX cons, prod;  	int recv = 0; +	struct xencons_info *xencons = vtermno_to_xencons(vtermno); +	if (xencons == NULL) +		return -EINVAL; +	intf = xencons->intf;  	cons = intf->in_cons;  	prod = intf->in_prod; @@ -119,7 +156,7 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len)  	mb();			/* read ring before consuming */  	intf->in_cons = cons; -	notify_daemon(); +	notify_daemon(xencons);  	return recv;  } @@ -157,68 +194,407 @@ static struct hv_ops dom0_hvc_ops = {  	.notifier_hangup = notifier_hangup_irq,  }; -static int __init xen_hvc_init(void) +static int xen_hvm_console_init(void)  { -	struct hvc_struct *hp; -	struct hv_ops *ops; +	int r; +	uint64_t v = 0; +	unsigned long mfn; +	struct xencons_info *info; + +	if (!xen_hvm_domain()) +		return -ENODEV; + +	info = vtermno_to_xencons(HVC_COOKIE); +	if (!info) { +		info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); +		if (!info) +			return -ENOMEM; +	} + +	/* already configured */ +	if (info->intf != NULL) +		return 0; + +	r = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v); +	if (r < 0) { +		kfree(info); +		return -ENODEV; +	} +	info->evtchn = v; +	hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &v); +	if (r < 0) { +		kfree(info); +		return -ENODEV; +	} +	mfn = v; +	info->intf = ioremap(mfn << PAGE_SHIFT, PAGE_SIZE); +	if (info->intf == NULL) { +		kfree(info); +		return -ENODEV; +	} +	info->vtermno = HVC_COOKIE; + +	spin_lock(&xencons_lock); +	list_add_tail(&info->list, &xenconsoles); +	spin_unlock(&xencons_lock); + +	return 0; +} + +static int xen_pv_console_init(void) +{ +	struct xencons_info *info;  	if (!xen_pv_domain())  		return -ENODEV; -	if (xen_initial_domain()) { -		ops = &dom0_hvc_ops; -		xencons_irq = bind_virq_to_irq(VIRQ_CONSOLE, 0); -	} else { -		if (!xen_start_info->console.domU.evtchn) -			return -ENODEV; +	if (!xen_start_info->console.domU.evtchn) +		return -ENODEV; -		ops = &domU_hvc_ops; -		xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn); +	info = vtermno_to_xencons(HVC_COOKIE); +	if (!info) { +		info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); +		if (!info) +			return -ENOMEM;  	} -	if (xencons_irq < 0) -		xencons_irq = 0; -	else -		irq_set_noprobe(xencons_irq); -	hp = hvc_alloc(HVC_COOKIE, xencons_irq, ops, 256); -	if (IS_ERR(hp)) -		return PTR_ERR(hp); +	/* already configured */ +	if (info->intf != NULL) +		return 0; + +	info->evtchn = xen_start_info->console.domU.evtchn; +	info->intf = mfn_to_virt(xen_start_info->console.domU.mfn); +	info->vtermno = HVC_COOKIE; + +	spin_lock(&xencons_lock); +	list_add_tail(&info->list, &xenconsoles); +	spin_unlock(&xencons_lock); + +	return 0; +} + +static int xen_initial_domain_console_init(void) +{ +	struct xencons_info *info; + +	if (!xen_initial_domain()) +		return -ENODEV; + +	info = vtermno_to_xencons(HVC_COOKIE); +	if (!info) { +		info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); +		if (!info) +			return -ENOMEM; +	} -	hvc = hp; +	info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0); +	info->vtermno = HVC_COOKIE; -	console_pfn = mfn_to_pfn(xen_start_info->console.domU.mfn); +	spin_lock(&xencons_lock); +	list_add_tail(&info->list, &xenconsoles); +	spin_unlock(&xencons_lock);  	return 0;  }  void xen_console_resume(void)  { -	if (xencons_irq) -		rebind_evtchn_irq(xen_start_info->console.domU.evtchn, xencons_irq); +	struct xencons_info *info = vtermno_to_xencons(HVC_COOKIE); +	if (info != NULL && info->irq) +		rebind_evtchn_irq(info->evtchn, info->irq); +} + +static void xencons_disconnect_backend(struct xencons_info *info) +{ +	if (info->irq > 0) +		unbind_from_irqhandler(info->irq, NULL); +	info->irq = 0; +	if (info->evtchn > 0) +		xenbus_free_evtchn(info->xbdev, info->evtchn); +	info->evtchn = 0; +	if (info->gntref > 0) +		gnttab_free_grant_references(info->gntref); +	info->gntref = 0; +	if (info->hvc != NULL) +		hvc_remove(info->hvc); +	info->hvc = NULL; +} + +static void xencons_free(struct xencons_info *info) +{ +	free_page((unsigned long)info->intf); +	info->intf = NULL; +	info->vtermno = 0; +	kfree(info); +} + +static int xen_console_remove(struct xencons_info *info) +{ +	xencons_disconnect_backend(info); +	spin_lock(&xencons_lock); +	list_del(&info->list); +	spin_unlock(&xencons_lock); +	if (info->xbdev != NULL) +		xencons_free(info); +	else { +		if (xen_hvm_domain()) +			iounmap(info->intf); +		kfree(info); +	} +	return 0; +} + +#ifdef CONFIG_HVC_XEN_FRONTEND +static struct xenbus_driver xencons_driver; + +static int xencons_remove(struct xenbus_device *dev) +{ +	return xen_console_remove(dev_get_drvdata(&dev->dev)); +} + +static int xencons_connect_backend(struct xenbus_device *dev, +				  struct xencons_info *info) +{ +	int ret, evtchn, devid, ref, irq; +	struct xenbus_transaction xbt; +	grant_ref_t gref_head; +	unsigned long mfn; + +	ret = xenbus_alloc_evtchn(dev, &evtchn); +	if (ret) +		return ret; +	info->evtchn = evtchn; +	irq = bind_evtchn_to_irq(evtchn); +	if (irq < 0) +		return irq; +	info->irq = irq; +	devid = dev->nodename[strlen(dev->nodename) - 1] - '0'; +	info->hvc = hvc_alloc(xenbus_devid_to_vtermno(devid), +			irq, &domU_hvc_ops, 256); +	if (IS_ERR(info->hvc)) +		return PTR_ERR(info->hvc); +	if (xen_pv_domain()) +		mfn = virt_to_mfn(info->intf); +	else +		mfn = __pa(info->intf) >> PAGE_SHIFT; +	ret = gnttab_alloc_grant_references(1, &gref_head); +	if (ret < 0) +		return ret; +	info->gntref = gref_head; +	ref = gnttab_claim_grant_reference(&gref_head); +	if (ref < 0) +		return ref; +	gnttab_grant_foreign_access_ref(ref, info->xbdev->otherend_id, +			mfn, 0); + + again: +	ret = xenbus_transaction_start(&xbt); +	if (ret) { +		xenbus_dev_fatal(dev, ret, "starting transaction"); +		return ret; +	} +	ret = xenbus_printf(xbt, dev->nodename, "ring-ref", "%d", ref); +	if (ret) +		goto error_xenbus; +	ret = xenbus_printf(xbt, dev->nodename, "port", "%u", +			    evtchn); +	if (ret) +		goto error_xenbus; +	ret = xenbus_printf(xbt, dev->nodename, "type", "ioemu"); +	if (ret) +		goto error_xenbus; +	ret = xenbus_transaction_end(xbt, 0); +	if (ret) { +		if (ret == -EAGAIN) +			goto again; +		xenbus_dev_fatal(dev, ret, "completing transaction"); +		return ret; +	} + +	xenbus_switch_state(dev, XenbusStateInitialised); +	return 0; + + error_xenbus: +	xenbus_transaction_end(xbt, 1); +	xenbus_dev_fatal(dev, ret, "writing xenstore"); +	return ret; +} + +static int __devinit xencons_probe(struct xenbus_device *dev, +				  const struct xenbus_device_id *id) +{ +	int ret, devid; +	struct xencons_info *info; + +	devid = dev->nodename[strlen(dev->nodename) - 1] - '0'; +	if (devid == 0) +		return -ENODEV; + +	info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); +	if (!info) +		goto error_nomem; +	dev_set_drvdata(&dev->dev, info); +	info->xbdev = dev; +	info->vtermno = xenbus_devid_to_vtermno(devid); +	info->intf = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); +	if (!info->intf) +		goto error_nomem; + +	ret = xencons_connect_backend(dev, info); +	if (ret < 0) +		goto error; +	spin_lock(&xencons_lock); +	list_add_tail(&info->list, &xenconsoles); +	spin_unlock(&xencons_lock); + +	return 0; + + error_nomem: +	ret = -ENOMEM; +	xenbus_dev_fatal(dev, ret, "allocating device memory"); + error: +	xencons_disconnect_backend(info); +	xencons_free(info); +	return ret; +} + +static int xencons_resume(struct xenbus_device *dev) +{ +	struct xencons_info *info = dev_get_drvdata(&dev->dev); + +	xencons_disconnect_backend(info); +	memset(info->intf, 0, PAGE_SIZE); +	return xencons_connect_backend(dev, info); +} + +static void xencons_backend_changed(struct xenbus_device *dev, +				   enum xenbus_state backend_state) +{ +	switch (backend_state) { +	case XenbusStateReconfiguring: +	case XenbusStateReconfigured: +	case XenbusStateInitialising: +	case XenbusStateInitialised: +	case XenbusStateUnknown: +	case XenbusStateClosed: +		break; + +	case XenbusStateInitWait: +		break; + +	case XenbusStateConnected: +		xenbus_switch_state(dev, XenbusStateConnected); +		break; + +	case XenbusStateClosing: +		xenbus_frontend_closed(dev); +		break; +	} +} + +static const struct xenbus_device_id xencons_ids[] = { +	{ "console" }, +	{ "" } +}; + + +static DEFINE_XENBUS_DRIVER(xencons, "xenconsole", +	.probe = xencons_probe, +	.remove = xencons_remove, +	.resume = xencons_resume, +	.otherend_changed = xencons_backend_changed, +); +#endif /* CONFIG_HVC_XEN_FRONTEND */ + +static int __init xen_hvc_init(void) +{ +	int r; +	struct xencons_info *info; +	const struct hv_ops *ops; + +	if (!xen_domain()) +		return -ENODEV; + +	if (xen_initial_domain()) { +		ops = &dom0_hvc_ops; +		r = xen_initial_domain_console_init(); +		if (r < 0) +			return r; +		info = vtermno_to_xencons(HVC_COOKIE); +	} else { +		ops = &domU_hvc_ops; +		if (xen_hvm_domain()) +			r = xen_hvm_console_init(); +		else +			r = xen_pv_console_init(); +		if (r < 0) +			return r; + +		info = vtermno_to_xencons(HVC_COOKIE); +		info->irq = bind_evtchn_to_irq(info->evtchn); +	} +	if (info->irq < 0) +		info->irq = 0; /* NO_IRQ */ +	else +		irq_set_noprobe(info->irq); + +	info->hvc = hvc_alloc(HVC_COOKIE, info->irq, ops, 256); +	if (IS_ERR(info->hvc)) { +		r = PTR_ERR(info->hvc); +		spin_lock(&xencons_lock); +		list_del(&info->list); +		spin_unlock(&xencons_lock); +		if (info->irq) +			unbind_from_irqhandler(info->irq, NULL); +		kfree(info); +		return r; +	} + +	r = 0; +#ifdef CONFIG_HVC_XEN_FRONTEND +	r = xenbus_register_frontend(&xencons_driver); +#endif +	return r;  }  static void __exit xen_hvc_fini(void)  { -	if (hvc) -		hvc_remove(hvc); +	struct xencons_info *entry, *next; + +	if (list_empty(&xenconsoles)) +			return; + +	list_for_each_entry_safe(entry, next, &xenconsoles, list) { +		xen_console_remove(entry); +	}  }  static int xen_cons_init(void)  { -	struct hv_ops *ops; +	const struct hv_ops *ops; -	if (!xen_pv_domain()) +	if (!xen_domain())  		return 0;  	if (xen_initial_domain())  		ops = &dom0_hvc_ops; -	else +	else { +		int r;  		ops = &domU_hvc_ops; +		if (xen_hvm_domain()) +			r = xen_hvm_console_init(); +		else +			r = xen_pv_console_init(); +		if (r < 0) +			return r; +	} +  	hvc_instantiate(HVC_COOKIE, 0, ops);  	return 0;  } +  module_init(xen_hvc_init);  module_exit(xen_hvc_fini);  console_initcall(xen_cons_init); @@ -230,6 +606,9 @@ static void xenboot_write_console(struct console *console, const char *string,  	unsigned int linelen, off = 0;  	const char *pos; +	if (!xen_pv_domain()) +		return; +  	dom0_write_console(0, string, len);  	if (xen_initial_domain()) diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 6ca0c407c14..eca60c73ef1 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -2269,6 +2269,7 @@ config XEN_FBDEV_FRONTEND  	select FB_SYS_IMAGEBLIT  	select FB_SYS_FOPS  	select FB_DEFERRED_IO +	select INPUT_XEN_KBDDEV_FRONTEND  	select XEN_XENBUS_FRONTEND  	default y  	help diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index a1ced521cf7..648bcd4195c 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -178,4 +178,21 @@ config XEN_PRIVCMD  	depends on XEN  	default m +config XEN_ACPI_PROCESSOR +	tristate "Xen ACPI processor" +	depends on XEN && X86 && ACPI_PROCESSOR +	default y if (X86_ACPI_CPUFREQ = y || X86_POWERNOW_K8 = y) +	default m if (X86_ACPI_CPUFREQ = m || X86_POWERNOW_K8 = m) +	help +          This ACPI processor uploads Power Management information to the Xen hypervisor. + +	  To do that the driver parses the Power Management data and uploads said +	  information to the Xen hypervisor. Then the Xen hypervisor can select the +          proper Cx and Pxx states. It also registers itslef as the SMM so that +          other drivers (such as ACPI cpufreq scaling driver) will not load. + +          To compile this driver as a module, choose M here: the +          module will be called xen_acpi_processor  If you do not know what to choose, +          select M here. If the CPUFREQ drivers are built in, select Y here. +  endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index aa31337192c..9adc5be57b1 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -20,7 +20,7 @@ obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o  obj-$(CONFIG_XEN_DOM0)			+= pci.o  obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= xen-pciback/  obj-$(CONFIG_XEN_PRIVCMD)		+= xen-privcmd.o - +obj-$(CONFIG_XEN_ACPI_PROCESSOR)	+= xen-acpi-processor.o  xen-evtchn-y				:= evtchn.o  xen-gntdev-y				:= gntdev.o  xen-gntalloc-y				:= gntalloc.o diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c index 1e0fe01eb67..fdb6d229c9b 100644 --- a/drivers/xen/sys-hypervisor.c +++ b/drivers/xen/sys-hypervisor.c @@ -97,7 +97,7 @@ static struct attribute *version_attrs[] = {  	NULL  }; -static struct attribute_group version_group = { +static const struct attribute_group version_group = {  	.name = "version",  	.attrs = version_attrs,  }; @@ -210,7 +210,7 @@ static struct attribute *xen_compile_attrs[] = {  	NULL  }; -static struct attribute_group xen_compilation_group = { +static const struct attribute_group xen_compilation_group = {  	.name = "compilation",  	.attrs = xen_compile_attrs,  }; @@ -340,7 +340,7 @@ static struct attribute *xen_properties_attrs[] = {  	NULL  }; -static struct attribute_group xen_properties_group = { +static const struct attribute_group xen_properties_group = {  	.name = "properties",  	.attrs = xen_properties_attrs,  }; diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c new file mode 100644 index 00000000000..5c2be963aa1 --- /dev/null +++ b/drivers/xen/xen-acpi-processor.c @@ -0,0 +1,562 @@ +/* + * Copyright 2012 by Oracle Inc + * Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> + * + * This code borrows ideas from https://lkml.org/lkml/2011/11/30/249 + * so many thanks go to Kevin Tian <kevin.tian@intel.com> + * and Yu Ke <ke.yu@intel.com>. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + */ + +#include <linux/cpumask.h> +#include <linux/cpufreq.h> +#include <linux/freezer.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/types.h> +#include <acpi/acpi_bus.h> +#include <acpi/acpi_drivers.h> +#include <acpi/processor.h> + +#include <xen/interface/platform.h> +#include <asm/xen/hypercall.h> + +#define DRV_NAME "xen-acpi-processor: " + +static int no_hypercall; +MODULE_PARM_DESC(off, "Inhibit the hypercall."); +module_param_named(off, no_hypercall, int, 0400); + +/* + * Note: Do not convert the acpi_id* below to cpumask_var_t or use cpumask_bit + * - as those shrink to nr_cpu_bits (which is dependent on possible_cpu), which + * can be less than what we want to put in. Instead use the 'nr_acpi_bits' + * which is dynamically computed based on the MADT or x2APIC table. + */ +static unsigned int nr_acpi_bits; +/* Mutex to protect the acpi_ids_done - for CPU hotplug use. */ +static DEFINE_MUTEX(acpi_ids_mutex); +/* Which ACPI ID we have processed from 'struct acpi_processor'. */ +static unsigned long *acpi_ids_done; +/* Which ACPI ID exist in the SSDT/DSDT processor definitions. */ +static unsigned long __initdata *acpi_id_present; +/* And if there is an _CST definition (or a PBLK) for the ACPI IDs */ +static unsigned long __initdata *acpi_id_cst_present; + +static int push_cxx_to_hypervisor(struct acpi_processor *_pr) +{ +	struct xen_platform_op op = { +		.cmd			= XENPF_set_processor_pminfo, +		.interface_version	= XENPF_INTERFACE_VERSION, +		.u.set_pminfo.id	= _pr->acpi_id, +		.u.set_pminfo.type	= XEN_PM_CX, +	}; +	struct xen_processor_cx *dst_cx, *dst_cx_states = NULL; +	struct acpi_processor_cx *cx; +	unsigned int i, ok; +	int ret = 0; + +	dst_cx_states = kcalloc(_pr->power.count, +				sizeof(struct xen_processor_cx), GFP_KERNEL); +	if (!dst_cx_states) +		return -ENOMEM; + +	for (ok = 0, i = 1; i <= _pr->power.count; i++) { +		cx = &_pr->power.states[i]; +		if (!cx->valid) +			continue; + +		dst_cx = &(dst_cx_states[ok++]); + +		dst_cx->reg.space_id = ACPI_ADR_SPACE_SYSTEM_IO; +		if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) { +			dst_cx->reg.bit_width = 8; +			dst_cx->reg.bit_offset = 0; +			dst_cx->reg.access_size = 1; +		} else { +			dst_cx->reg.space_id = ACPI_ADR_SPACE_FIXED_HARDWARE; +			if (cx->entry_method == ACPI_CSTATE_FFH) { +				/* NATIVE_CSTATE_BEYOND_HALT */ +				dst_cx->reg.bit_offset = 2; +				dst_cx->reg.bit_width = 1; /* VENDOR_INTEL */ +			} +			dst_cx->reg.access_size = 0; +		} +		dst_cx->reg.address = cx->address; + +		dst_cx->type = cx->type; +		dst_cx->latency = cx->latency; +		dst_cx->power = cx->power; + +		dst_cx->dpcnt = 0; +		set_xen_guest_handle(dst_cx->dp, NULL); +	} +	if (!ok) { +		pr_debug(DRV_NAME "No _Cx for ACPI CPU %u\n", _pr->acpi_id); +		kfree(dst_cx_states); +		return -EINVAL; +	} +	op.u.set_pminfo.power.count = ok; +	op.u.set_pminfo.power.flags.bm_control = _pr->flags.bm_control; +	op.u.set_pminfo.power.flags.bm_check = _pr->flags.bm_check; +	op.u.set_pminfo.power.flags.has_cst = _pr->flags.has_cst; +	op.u.set_pminfo.power.flags.power_setup_done = +		_pr->flags.power_setup_done; + +	set_xen_guest_handle(op.u.set_pminfo.power.states, dst_cx_states); + +	if (!no_hypercall) +		ret = HYPERVISOR_dom0_op(&op); + +	if (!ret) { +		pr_debug("ACPI CPU%u - C-states uploaded.\n", _pr->acpi_id); +		for (i = 1; i <= _pr->power.count; i++) { +			cx = &_pr->power.states[i]; +			if (!cx->valid) +				continue; +			pr_debug("     C%d: %s %d uS\n", +				 cx->type, cx->desc, (u32)cx->latency); +		} +	} else +		pr_err(DRV_NAME "(CX): Hypervisor error (%d) for ACPI CPU%u\n", +		       ret, _pr->acpi_id); + +	kfree(dst_cx_states); + +	return ret; +} +static struct xen_processor_px * +xen_copy_pss_data(struct acpi_processor *_pr, +		  struct xen_processor_performance *dst_perf) +{ +	struct xen_processor_px *dst_states = NULL; +	unsigned int i; + +	BUILD_BUG_ON(sizeof(struct xen_processor_px) != +		     sizeof(struct acpi_processor_px)); + +	dst_states = kcalloc(_pr->performance->state_count, +			     sizeof(struct xen_processor_px), GFP_KERNEL); +	if (!dst_states) +		return ERR_PTR(-ENOMEM); + +	dst_perf->state_count = _pr->performance->state_count; +	for (i = 0; i < _pr->performance->state_count; i++) { +		/* Fortunatly for us, they are both the same size */ +		memcpy(&(dst_states[i]), &(_pr->performance->states[i]), +		       sizeof(struct acpi_processor_px)); +	} +	return dst_states; +} +static int xen_copy_psd_data(struct acpi_processor *_pr, +			     struct xen_processor_performance *dst) +{ +	struct acpi_psd_package *pdomain; + +	BUILD_BUG_ON(sizeof(struct xen_psd_package) != +		     sizeof(struct acpi_psd_package)); + +	/* This information is enumerated only if acpi_processor_preregister_performance +	 * has been called. +	 */ +	dst->shared_type = _pr->performance->shared_type; + +	pdomain = &(_pr->performance->domain_info); + +	/* 'acpi_processor_preregister_performance' does not parse if the +	 * num_processors <= 1, but Xen still requires it. Do it manually here. +	 */ +	if (pdomain->num_processors <= 1) { +		if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL) +			dst->shared_type = CPUFREQ_SHARED_TYPE_ALL; +		else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL) +			dst->shared_type = CPUFREQ_SHARED_TYPE_HW; +		else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY) +			dst->shared_type = CPUFREQ_SHARED_TYPE_ANY; + +	} +	memcpy(&(dst->domain_info), pdomain, sizeof(struct acpi_psd_package)); +	return 0; +} +static int xen_copy_pct_data(struct acpi_pct_register *pct, +			     struct xen_pct_register *dst_pct) +{ +	/* It would be nice if you could just do 'memcpy(pct, dst_pct') but +	 * sadly the Xen structure did not have the proper padding so the +	 * descriptor field takes two (dst_pct) bytes instead of one (pct). +	 */ +	dst_pct->descriptor = pct->descriptor; +	dst_pct->length = pct->length; +	dst_pct->space_id = pct->space_id; +	dst_pct->bit_width = pct->bit_width; +	dst_pct->bit_offset = pct->bit_offset; +	dst_pct->reserved = pct->reserved; +	dst_pct->address = pct->address; +	return 0; +} +static int push_pxx_to_hypervisor(struct acpi_processor *_pr) +{ +	int ret = 0; +	struct xen_platform_op op = { +		.cmd			= XENPF_set_processor_pminfo, +		.interface_version	= XENPF_INTERFACE_VERSION, +		.u.set_pminfo.id	= _pr->acpi_id, +		.u.set_pminfo.type	= XEN_PM_PX, +	}; +	struct xen_processor_performance *dst_perf; +	struct xen_processor_px *dst_states = NULL; + +	dst_perf = &op.u.set_pminfo.perf; + +	dst_perf->platform_limit = _pr->performance_platform_limit; +	dst_perf->flags |= XEN_PX_PPC; +	xen_copy_pct_data(&(_pr->performance->control_register), +			  &dst_perf->control_register); +	xen_copy_pct_data(&(_pr->performance->status_register), +			  &dst_perf->status_register); +	dst_perf->flags |= XEN_PX_PCT; +	dst_states = xen_copy_pss_data(_pr, dst_perf); +	if (!IS_ERR_OR_NULL(dst_states)) { +		set_xen_guest_handle(dst_perf->states, dst_states); +		dst_perf->flags |= XEN_PX_PSS; +	} +	if (!xen_copy_psd_data(_pr, dst_perf)) +		dst_perf->flags |= XEN_PX_PSD; + +	if (dst_perf->flags != (XEN_PX_PSD | XEN_PX_PSS | XEN_PX_PCT | XEN_PX_PPC)) { +		pr_warn(DRV_NAME "ACPI CPU%u missing some P-state data (%x), skipping.\n", +			_pr->acpi_id, dst_perf->flags); +		ret = -ENODEV; +		goto err_free; +	} + +	if (!no_hypercall) +		ret = HYPERVISOR_dom0_op(&op); + +	if (!ret) { +		struct acpi_processor_performance *perf; +		unsigned int i; + +		perf = _pr->performance; +		pr_debug("ACPI CPU%u - P-states uploaded.\n", _pr->acpi_id); +		for (i = 0; i < perf->state_count; i++) { +			pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n", +			(i == perf->state ? '*' : ' '), i, +			(u32) perf->states[i].core_frequency, +			(u32) perf->states[i].power, +			(u32) perf->states[i].transition_latency); +		} +	} else if (ret != -EINVAL) +		/* EINVAL means the ACPI ID is incorrect - meaning the ACPI +		 * table is referencing a non-existing CPU - which can happen +		 * with broken ACPI tables. */ +		pr_warn(DRV_NAME "(_PXX): Hypervisor error (%d) for ACPI CPU%u\n", +		       ret, _pr->acpi_id); +err_free: +	if (!IS_ERR_OR_NULL(dst_states)) +		kfree(dst_states); + +	return ret; +} +static int upload_pm_data(struct acpi_processor *_pr) +{ +	int err = 0; + +	mutex_lock(&acpi_ids_mutex); +	if (__test_and_set_bit(_pr->acpi_id, acpi_ids_done)) { +		mutex_unlock(&acpi_ids_mutex); +		return -EBUSY; +	} +	if (_pr->flags.power) +		err = push_cxx_to_hypervisor(_pr); + +	if (_pr->performance && _pr->performance->states) +		err |= push_pxx_to_hypervisor(_pr); + +	mutex_unlock(&acpi_ids_mutex); +	return err; +} +static unsigned int __init get_max_acpi_id(void) +{ +	struct xenpf_pcpuinfo *info; +	struct xen_platform_op op = { +		.cmd = XENPF_get_cpuinfo, +		.interface_version = XENPF_INTERFACE_VERSION, +	}; +	int ret = 0; +	unsigned int i, last_cpu, max_acpi_id = 0; + +	info = &op.u.pcpu_info; +	info->xen_cpuid = 0; + +	ret = HYPERVISOR_dom0_op(&op); +	if (ret) +		return NR_CPUS; + +	/* The max_present is the same irregardless of the xen_cpuid */ +	last_cpu = op.u.pcpu_info.max_present; +	for (i = 0; i <= last_cpu; i++) { +		info->xen_cpuid = i; +		ret = HYPERVISOR_dom0_op(&op); +		if (ret) +			continue; +		max_acpi_id = max(info->acpi_id, max_acpi_id); +	} +	max_acpi_id *= 2; /* Slack for CPU hotplug support. */ +	pr_debug(DRV_NAME "Max ACPI ID: %u\n", max_acpi_id); +	return max_acpi_id; +} +/* + * The read_acpi_id and check_acpi_ids are there to support the Xen + * oddity of virtual CPUs != physical CPUs in the initial domain. + * The user can supply 'xen_max_vcpus=X' on the Xen hypervisor line + * which will band the amount of CPUs the initial domain can see. + * In general that is OK, except it plays havoc with any of the + * for_each_[present|online]_cpu macros which are banded to the virtual + * CPU amount. + */ +static acpi_status __init +read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv) +{ +	u32 acpi_id; +	acpi_status status; +	acpi_object_type acpi_type; +	unsigned long long tmp; +	union acpi_object object = { 0 }; +	struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; +	acpi_io_address pblk = 0; + +	status = acpi_get_type(handle, &acpi_type); +	if (ACPI_FAILURE(status)) +		return AE_OK; + +	switch (acpi_type) { +	case ACPI_TYPE_PROCESSOR: +		status = acpi_evaluate_object(handle, NULL, NULL, &buffer); +		if (ACPI_FAILURE(status)) +			return AE_OK; +		acpi_id = object.processor.proc_id; +		pblk = object.processor.pblk_address; +		break; +	case ACPI_TYPE_DEVICE: +		status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp); +		if (ACPI_FAILURE(status)) +			return AE_OK; +		acpi_id = tmp; +		break; +	default: +		return AE_OK; +	} +	/* There are more ACPI Processor objects than in x2APIC or MADT. +	 * This can happen with incorrect ACPI SSDT declerations. */ +	if (acpi_id > nr_acpi_bits) { +		pr_debug(DRV_NAME "We only have %u, trying to set %u\n", +			 nr_acpi_bits, acpi_id); +		return AE_OK; +	} +	/* OK, There is a ACPI Processor object */ +	__set_bit(acpi_id, acpi_id_present); + +	pr_debug(DRV_NAME "ACPI CPU%u w/ PBLK:0x%lx\n", acpi_id, +		 (unsigned long)pblk); + +	status = acpi_evaluate_object(handle, "_CST", NULL, &buffer); +	if (ACPI_FAILURE(status)) { +		if (!pblk) +			return AE_OK; +	} +	/* .. and it has a C-state */ +	__set_bit(acpi_id, acpi_id_cst_present); + +	return AE_OK; +} +static int __init check_acpi_ids(struct acpi_processor *pr_backup) +{ + +	if (!pr_backup) +		return -ENODEV; + +	/* All online CPUs have been processed at this stage. Now verify +	 * whether in fact "online CPUs" == physical CPUs. +	 */ +	acpi_id_present = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL); +	if (!acpi_id_present) +		return -ENOMEM; + +	acpi_id_cst_present = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL); +	if (!acpi_id_cst_present) { +		kfree(acpi_id_present); +		return -ENOMEM; +	} + +	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, +			    ACPI_UINT32_MAX, +			    read_acpi_id, NULL, NULL, NULL); +	acpi_get_devices("ACPI0007", read_acpi_id, NULL, NULL); + +	if (!bitmap_equal(acpi_id_present, acpi_ids_done, nr_acpi_bits)) { +		unsigned int i; +		for_each_set_bit(i, acpi_id_present, nr_acpi_bits) { +			pr_backup->acpi_id = i; +			/* Mask out C-states if there are no _CST or PBLK */ +			pr_backup->flags.power = test_bit(i, acpi_id_cst_present); +			(void)upload_pm_data(pr_backup); +		} +	} +	kfree(acpi_id_present); +	acpi_id_present = NULL; +	kfree(acpi_id_cst_present); +	acpi_id_cst_present = NULL; +	return 0; +} +static int __init check_prereq(void) +{ +	struct cpuinfo_x86 *c = &cpu_data(0); + +	if (!xen_initial_domain()) +		return -ENODEV; + +	if (!acpi_gbl_FADT.smi_command) +		return -ENODEV; + +	if (c->x86_vendor == X86_VENDOR_INTEL) { +		if (!cpu_has(c, X86_FEATURE_EST)) +			return -ENODEV; + +		return 0; +	} +	if (c->x86_vendor == X86_VENDOR_AMD) { +		/* Copied from powernow-k8.h, can't include ../cpufreq/powernow +		 * as we get compile warnings for the static functions. +		 */ +#define CPUID_FREQ_VOLT_CAPABILITIES    0x80000007 +#define USE_HW_PSTATE                   0x00000080 +		u32 eax, ebx, ecx, edx; +		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); +		if ((edx & USE_HW_PSTATE) != USE_HW_PSTATE) +			return -ENODEV; +		return 0; +	} +	return -ENODEV; +} +/* acpi_perf_data is a pointer to percpu data. */ +static struct acpi_processor_performance __percpu *acpi_perf_data; + +static void free_acpi_perf_data(void) +{ +	unsigned int i; + +	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */ +	for_each_possible_cpu(i) +		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i) +				 ->shared_cpu_map); +	free_percpu(acpi_perf_data); +} + +static int __init xen_acpi_processor_init(void) +{ +	struct acpi_processor *pr_backup = NULL; +	unsigned int i; +	int rc = check_prereq(); + +	if (rc) +		return rc; + +	nr_acpi_bits = get_max_acpi_id() + 1; +	acpi_ids_done = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL); +	if (!acpi_ids_done) +		return -ENOMEM; + +	acpi_perf_data = alloc_percpu(struct acpi_processor_performance); +	if (!acpi_perf_data) { +		pr_debug(DRV_NAME "Memory allocation error for acpi_perf_data.\n"); +		kfree(acpi_ids_done); +		return -ENOMEM; +	} +	for_each_possible_cpu(i) { +		if (!zalloc_cpumask_var_node( +			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, +			GFP_KERNEL, cpu_to_node(i))) { +			rc = -ENOMEM; +			goto err_out; +		} +	} + +	/* Do initialization in ACPI core. It is OK to fail here. */ +	(void)acpi_processor_preregister_performance(acpi_perf_data); + +	for_each_possible_cpu(i) { +		struct acpi_processor_performance *perf; + +		perf = per_cpu_ptr(acpi_perf_data, i); +		rc = acpi_processor_register_performance(perf, i); +		if (WARN_ON(rc)) +			goto err_out; +	} +	rc = acpi_processor_notify_smm(THIS_MODULE); +	if (WARN_ON(rc)) +		goto err_unregister; + +	for_each_possible_cpu(i) { +		struct acpi_processor *_pr; +		_pr = per_cpu(processors, i /* APIC ID */); +		if (!_pr) +			continue; + +		if (!pr_backup) { +			pr_backup = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL); +			memcpy(pr_backup, _pr, sizeof(struct acpi_processor)); +		} +		(void)upload_pm_data(_pr); +	} +	rc = check_acpi_ids(pr_backup); +	if (rc) +		goto err_unregister; + +	kfree(pr_backup); + +	return 0; +err_unregister: +	for_each_possible_cpu(i) { +		struct acpi_processor_performance *perf; +		perf = per_cpu_ptr(acpi_perf_data, i); +		acpi_processor_unregister_performance(perf, i); +	} +err_out: +	/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ +	free_acpi_perf_data(); +	kfree(acpi_ids_done); +	return rc; +} +static void __exit xen_acpi_processor_exit(void) +{ +	int i; + +	kfree(acpi_ids_done); +	for_each_possible_cpu(i) { +		struct acpi_processor_performance *perf; +		perf = per_cpu_ptr(acpi_perf_data, i); +		acpi_processor_unregister_performance(perf, i); +	} +	free_acpi_perf_data(); +} + +MODULE_AUTHOR("Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>"); +MODULE_DESCRIPTION("Xen ACPI Processor P-states (and Cx) driver which uploads PM data to Xen hypervisor"); +MODULE_LICENSE("GPL"); + +/* We want to be loaded before the CPU freq scaling drivers are loaded. + * They are loaded in late_initcall. */ +device_initcall(xen_acpi_processor_init); +module_exit(xen_acpi_processor_exit); diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index 596e6a7b17d..8f37e23f6d1 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -207,7 +207,7 @@ static struct attribute *balloon_info_attrs[] = {  	NULL  }; -static struct attribute_group balloon_info_group = { +static const struct attribute_group balloon_info_group = {  	.name = "info",  	.attrs = balloon_info_attrs  }; diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 19834d1c7c3..097e536e867 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -85,19 +85,34 @@ static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)  static void pcistub_device_release(struct kref *kref)  {  	struct pcistub_device *psdev; +	struct xen_pcibk_dev_data *dev_data;  	psdev = container_of(kref, struct pcistub_device, kref); +	dev_data = pci_get_drvdata(psdev->dev);  	dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");  	xen_unregister_device_domain_owner(psdev->dev); -	/* Clean-up the device */ +	/* Call the reset function which does not take lock as this +	 * is called from "unbind" which takes a device_lock mutex. +	 */ +	__pci_reset_function_locked(psdev->dev); +	if (pci_load_and_free_saved_state(psdev->dev, +					  &dev_data->pci_saved_state)) { +		dev_dbg(&psdev->dev->dev, "Could not reload PCI state\n"); +	} else +		pci_restore_state(psdev->dev); + +	/* Disable the device */  	xen_pcibk_reset_device(psdev->dev); + +	kfree(dev_data); +	pci_set_drvdata(psdev->dev, NULL); + +	/* Clean-up the device */  	xen_pcibk_config_free_dyn_fields(psdev->dev);  	xen_pcibk_config_free_dev(psdev->dev); -	kfree(pci_get_drvdata(psdev->dev)); -	pci_set_drvdata(psdev->dev, NULL);  	psdev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;  	pci_dev_put(psdev->dev); @@ -231,7 +246,17 @@ void pcistub_put_pci_dev(struct pci_dev *dev)  	/* Cleanup our device  	 * (so it's ready for the next domain)  	 */ + +	/* This is OK - we are running from workqueue context +	 * and want to inhibit the user from fiddling with 'reset' +	 */ +	pci_reset_function(dev); +	pci_restore_state(psdev->dev); + +	/* This disables the device. */  	xen_pcibk_reset_device(found_psdev->dev); + +	/* And cleanup up our emulated fields. */  	xen_pcibk_config_free_dyn_fields(found_psdev->dev);  	xen_pcibk_config_reset_dev(found_psdev->dev); @@ -328,6 +353,16 @@ static int __devinit pcistub_init_device(struct pci_dev *dev)  	if (err)  		goto config_release; +	dev_dbg(&dev->dev, "reseting (FLR, D3, etc) the device\n"); +	__pci_reset_function_locked(dev); + +	/* We need the device active to save the state. */ +	dev_dbg(&dev->dev, "save state of device\n"); +	pci_save_state(dev); +	dev_data->pci_saved_state = pci_store_saved_state(dev); +	if (!dev_data->pci_saved_state) +		dev_err(&dev->dev, "Could not store PCI conf saved state!\n"); +  	/* Now disable the device (this also ensures some private device  	 * data is setup before we export)  	 */ diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index e9b4011c5f9..a7def010eba 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -41,6 +41,7 @@ struct xen_pcibk_device {  struct xen_pcibk_dev_data {  	struct list_head config_fields; +	struct pci_saved_state *pci_saved_state;  	unsigned int permissive:1;  	unsigned int warned_on_write:1;  	unsigned int enable_intx:1; diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 767ff656d5a..146c9489701 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -488,7 +488,7 @@ static struct attribute *selfballoon_attrs[] = {  	NULL  }; -static struct attribute_group selfballoon_group = { +static const struct attribute_group selfballoon_group = {  	.name = "selfballoon",  	.attrs = selfballoon_attrs  }; diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 566d2adbd6e..b3e146edb51 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -569,7 +569,7 @@ int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,  {  	struct gnttab_map_grant_ref op; -	gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref, +	gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, gnt_ref,  			  dev->otherend_id);  	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) @@ -662,7 +662,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)  			goto found;  		}  	} -	node = NULL; +	node = addr = NULL;   found:  	spin_unlock(&xenbus_valloc_lock); @@ -698,7 +698,7 @@ int xenbus_unmap_ring(struct xenbus_device *dev,  {  	struct gnttab_unmap_grant_ref op; -	gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle); +	gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map, handle);  	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))  		BUG(); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 3864967202b..b793723e724 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -257,11 +257,12 @@ int xenbus_dev_remove(struct device *_dev)  	DPRINTK("%s", dev->nodename);  	free_otherend_watch(dev); -	free_otherend_details(dev);  	if (drv->remove)  		drv->remove(dev); +	free_otherend_details(dev); +  	xenbus_switch_state(dev, XenbusStateClosed);  	return 0;  } diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index 9c57819df51..f20c5f178b4 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -53,6 +53,12 @@ static int xenbus_probe_frontend(struct xen_bus_type *bus, const char *type,  	char *nodename;  	int err; +	/* ignore console/0 */ +	if (!strncmp(type, "console", 7) && !strncmp(name, "0", 1)) { +		DPRINTK("Ignoring buggy device entry console/0"); +		return 0; +	} +  	nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, name);  	if (!nodename)  		return -ENOMEM; diff --git a/include/linux/pci.h b/include/linux/pci.h index 27bf521bceb..900da5db60e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -817,6 +817,7 @@ int pcie_set_readrq(struct pci_dev *dev, int rq);  int pcie_get_mps(struct pci_dev *dev);  int pcie_set_mps(struct pci_dev *dev, int mps);  int __pci_reset_function(struct pci_dev *dev); +int __pci_reset_function_locked(struct pci_dev *dev);  int pci_reset_function(struct pci_dev *dev);  void pci_update_resource(struct pci_dev *dev, int resno);  int __must_check pci_assign_resource(struct pci_dev *dev, int i); diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h index 1888d8c157e..1b4f923d708 100644 --- a/include/xen/interface/hvm/params.h +++ b/include/xen/interface/hvm/params.h @@ -90,6 +90,10 @@  /* Boolean: Enable aligning all periodic vpts to reduce interrupts */  #define HVM_PARAM_VPT_ALIGN    16 -#define HVM_NR_PARAMS          17 +/* Console debug shared memory ring and event channel */ +#define HVM_PARAM_CONSOLE_PFN    17 +#define HVM_PARAM_CONSOLE_EVTCHN 18 + +#define HVM_NR_PARAMS          19  #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index c1080d9c705..0c28989007f 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -145,6 +145,13 @@ struct physdev_manage_pci {  	uint8_t devfn;  }; +#define PHYSDEVOP_restore_msi            19 +struct physdev_restore_msi { +	/* IN */ +	uint8_t bus; +	uint8_t devfn; +}; +  #define PHYSDEVOP_manage_pci_add_ext	20  struct physdev_manage_pci_ext {  	/* IN */ diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h index c1684680431..486653f0dd8 100644 --- a/include/xen/interface/platform.h +++ b/include/xen/interface/platform.h @@ -200,7 +200,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t);  #define XEN_PM_CX   0  #define XEN_PM_PX   1  #define XEN_PM_TX   2 - +#define XEN_PM_PDC  3  /* Px sub info type */  #define XEN_PX_PCT   1  #define XEN_PX_PSS   2 @@ -293,10 +293,27 @@ struct xenpf_set_processor_pminfo {  	union {  		struct xen_processor_power          power;/* Cx: _CST/_CSD */  		struct xen_processor_performance    perf; /* Px: _PPC/_PCT/_PSS/_PSD */ +		GUEST_HANDLE(uint32_t)              pdc;  	};  };  DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo); +#define XENPF_get_cpuinfo 55 +struct xenpf_pcpuinfo { +	/* IN */ +	uint32_t xen_cpuid; +	/* OUT */ +	/* The maxium cpu_id that is present */ +	uint32_t max_present; +#define XEN_PCPU_FLAGS_ONLINE   1 +	/* Correponding xen_cpuid is not present*/ +#define XEN_PCPU_FLAGS_INVALID  2 +	uint32_t flags; +	uint32_t apic_id; +	uint32_t acpi_id; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo); +  struct xen_platform_op {  	uint32_t cmd;  	uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ @@ -312,6 +329,7 @@ struct xen_platform_op {  		struct xenpf_change_freq       change_freq;  		struct xenpf_getidletime       getidletime;  		struct xenpf_set_processor_pminfo set_pminfo; +		struct xenpf_pcpuinfo          pcpu_info;  		uint8_t                        pad[128];  	} u;  };  |