diff options
Diffstat (limited to 'arch/x86/xen')
| -rw-r--r-- | arch/x86/xen/enlighten.c | 118 | ||||
| -rw-r--r-- | arch/x86/xen/mmu.c | 2 | ||||
| -rw-r--r-- | arch/x86/xen/p2m.c | 95 | ||||
| -rw-r--r-- | arch/x86/xen/setup.c | 9 | ||||
| -rw-r--r-- | arch/x86/xen/suspend.c | 2 | ||||
| -rw-r--r-- | arch/x86/xen/xen-ops.h | 2 | 
6 files changed, 114 insertions, 114 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bf4bda6d3e9..9642d4a3860 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -31,7 +31,6 @@  #include <linux/pci.h>  #include <linux/gfp.h>  #include <linux/memblock.h> -#include <linux/syscore_ops.h>  #include <xen/xen.h>  #include <xen/interface/xen.h> @@ -1470,130 +1469,38 @@ asmlinkage void __init xen_start_kernel(void)  #endif  } -#ifdef CONFIG_XEN_PVHVM -/* - * The pfn containing the shared_info is located somewhere in RAM. This - * will cause trouble if the current kernel is doing a kexec boot into a - * new kernel. The new kernel (and its startup code) can not know where - * the pfn is, so it can not reserve the page. The hypervisor will - * continue to update the pfn, and as a result memory corruption occours - * in the new kernel. - * - * One way to work around this issue is to allocate a page in the - * xen-platform pci device's BAR memory range. But pci init is done very - * late and the shared_info page is already in use very early to read - * the pvclock. So moving the pfn from RAM to MMIO is racy because some - * code paths on other vcpus could access the pfn during the small - * window when the old pfn is moved to the new pfn. There is even a - * small window were the old pfn is not backed by a mfn, and during that - * time all reads return -1. - * - * Because it is not known upfront where the MMIO region is located it - * can not be used right from the start in xen_hvm_init_shared_info. - * - * To minimise trouble the move of the pfn is done shortly before kexec. - * This does not eliminate the race because all vcpus are still online - * when the syscore_ops will be called. But hopefully there is no work - * pending at this point in time. Also the syscore_op is run last which - * reduces the risk further. - */ - -static struct shared_info *xen_hvm_shared_info; - -static void xen_hvm_connect_shared_info(unsigned long pfn) +void __ref xen_hvm_init_shared_info(void)  { +	int cpu;  	struct xen_add_to_physmap xatp; +	static struct shared_info *shared_info_page = 0; +	if (!shared_info_page) +		shared_info_page = (struct shared_info *) +			extend_brk(PAGE_SIZE, PAGE_SIZE);  	xatp.domid = DOMID_SELF;  	xatp.idx = 0;  	xatp.space = XENMAPSPACE_shared_info; -	xatp.gpfn = pfn; +	xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;  	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))  		BUG(); -} -static void xen_hvm_set_shared_info(struct shared_info *sip) -{ -	int cpu; - -	HYPERVISOR_shared_info = sip; +	HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;  	/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info  	 * page, we use it in the event channel upcall and in some pvclock  	 * related functions. We don't need the vcpu_info placement  	 * optimizations because we don't use any pv_mmu or pv_irq op on  	 * HVM. -	 * When xen_hvm_set_shared_info is run at boot time only vcpu 0 is -	 * online but xen_hvm_set_shared_info is run at resume time too and +	 * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is +	 * online but xen_hvm_init_shared_info is run at resume time too and  	 * in that case multiple vcpus might be online. */  	for_each_online_cpu(cpu) {  		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];  	}  } -/* Reconnect the shared_info pfn to a mfn */ -void xen_hvm_resume_shared_info(void) -{ -	xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); -} - -#ifdef CONFIG_KEXEC -static struct shared_info *xen_hvm_shared_info_kexec; -static unsigned long xen_hvm_shared_info_pfn_kexec; - -/* Remember a pfn in MMIO space for kexec reboot */ -void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn) -{ -	xen_hvm_shared_info_kexec = sip; -	xen_hvm_shared_info_pfn_kexec = pfn; -} - -static void xen_hvm_syscore_shutdown(void) -{ -	struct xen_memory_reservation reservation = { -		.domid = DOMID_SELF, -		.nr_extents = 1, -	}; -	unsigned long prev_pfn; -	int rc; - -	if (!xen_hvm_shared_info_kexec) -		return; - -	prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT; -	set_xen_guest_handle(reservation.extent_start, &prev_pfn); - -	/* Move pfn to MMIO, disconnects previous pfn from mfn */ -	xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec); - -	/* Update pointers, following hypercall is also a memory barrier */ -	xen_hvm_set_shared_info(xen_hvm_shared_info_kexec); - -	/* Allocate new mfn for previous pfn */ -	do { -		rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); -		if (rc == 0) -			msleep(123); -	} while (rc == 0); - -	/* Make sure the previous pfn is really connected to a (new) mfn */ -	BUG_ON(rc != 1); -} - -static struct syscore_ops xen_hvm_syscore_ops = { -	.shutdown = xen_hvm_syscore_shutdown, -}; -#endif - -/* Use a pfn in RAM, may move to MMIO before kexec. */ -static void __init xen_hvm_init_shared_info(void) -{ -	/* Remember pointer for resume */ -	xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); -	xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); -	xen_hvm_set_shared_info(xen_hvm_shared_info); -} - +#ifdef CONFIG_XEN_PVHVM  static void __init init_hvm_pv_info(void)  {  	int major, minor; @@ -1644,9 +1551,6 @@ static void __init xen_hvm_guest_init(void)  	init_hvm_pv_info();  	xen_hvm_init_shared_info(); -#ifdef CONFIG_KEXEC -	register_syscore_ops(&xen_hvm_syscore_ops); -#endif  	if (xen_feature(XENFEAT_hvm_callback_vector))  		xen_have_vector_callback = 1; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index b65a76133f4..5141d808e75 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1283,7 +1283,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,  	cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));  	args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; -	if (start != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) { +	if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {  		args->op.cmd = MMUEXT_INVLPG_MULTI;  		args->op.arg1.linear_addr = start;  	} diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index b2e91d40a4c..76ba0e97e53 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -196,9 +196,11 @@ RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);  /* When we populate back during bootup, the amount of pages can vary. The   * max we have is seen is 395979, but that does not mean it can't be more. - * But some machines can have 3GB I/O holes even. So lets reserve enough - * for 4GB of I/O and E820 holes. */ -RESERVE_BRK(p2m_populated, PMD_SIZE * 4); + * Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle + * it can re-use Xen provided mfn_list array, so we only need to allocate at + * most three P2M top nodes. */ +RESERVE_BRK(p2m_populated, PAGE_SIZE * 3); +  static inline unsigned p2m_top_index(unsigned long pfn)  {  	BUG_ON(pfn >= MAX_P2M_PFN); @@ -575,12 +577,99 @@ static bool __init early_alloc_p2m(unsigned long pfn)  	}  	return true;  } + +/* + * Skim over the P2M tree looking at pages that are either filled with + * INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and + * replace the P2M leaf with a p2m_missing or p2m_identity. + * Stick the old page in the new P2M tree location. + */ +bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn) +{ +	unsigned topidx; +	unsigned mididx; +	unsigned ident_pfns; +	unsigned inv_pfns; +	unsigned long *p2m; +	unsigned long *mid_mfn_p; +	unsigned idx; +	unsigned long pfn; + +	/* We only look when this entails a P2M middle layer */ +	if (p2m_index(set_pfn)) +		return false; + +	for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) { +		topidx = p2m_top_index(pfn); + +		if (!p2m_top[topidx]) +			continue; + +		if (p2m_top[topidx] == p2m_mid_missing) +			continue; + +		mididx = p2m_mid_index(pfn); +		p2m = p2m_top[topidx][mididx]; +		if (!p2m) +			continue; + +		if ((p2m == p2m_missing) || (p2m == p2m_identity)) +			continue; + +		if ((unsigned long)p2m == INVALID_P2M_ENTRY) +			continue; + +		ident_pfns = 0; +		inv_pfns = 0; +		for (idx = 0; idx < P2M_PER_PAGE; idx++) { +			/* IDENTITY_PFNs are 1:1 */ +			if (p2m[idx] == IDENTITY_FRAME(pfn + idx)) +				ident_pfns++; +			else if (p2m[idx] == INVALID_P2M_ENTRY) +				inv_pfns++; +			else +				break; +		} +		if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE)) +			goto found; +	} +	return false; +found: +	/* Found one, replace old with p2m_identity or p2m_missing */ +	p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); +	/* And the other for save/restore.. */ +	mid_mfn_p = p2m_top_mfn_p[topidx]; +	/* NOTE: Even if it is a p2m_identity it should still be point to +	 * a page filled with INVALID_P2M_ENTRY entries. */ +	mid_mfn_p[mididx] = virt_to_mfn(p2m_missing); + +	/* Reset where we want to stick the old page in. */ +	topidx = p2m_top_index(set_pfn); +	mididx = p2m_mid_index(set_pfn); + +	/* This shouldn't happen */ +	if (WARN_ON(p2m_top[topidx] == p2m_mid_missing)) +		early_alloc_p2m(set_pfn); + +	if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing)) +		return false; + +	p2m_init(p2m); +	p2m_top[topidx][mididx] = p2m; +	mid_mfn_p = p2m_top_mfn_p[topidx]; +	mid_mfn_p[mididx] = virt_to_mfn(p2m); + +	return true; +}  bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)  {  	if (unlikely(!__set_phys_to_machine(pfn, mfn)))  {  		if (!early_alloc_p2m(pfn))  			return false; +		if (early_can_reuse_p2m_middle(pfn, mfn)) +			return __set_phys_to_machine(pfn, mfn); +  		if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/))  			return false; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index ead85576d54..d11ca11d14f 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -78,9 +78,16 @@ static void __init xen_add_extra_mem(u64 start, u64 size)  	memblock_reserve(start, size);  	xen_max_p2m_pfn = PFN_DOWN(start + size); +	for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { +		unsigned long mfn = pfn_to_mfn(pfn); + +		if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) +			continue; +		WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", +			pfn, mfn); -	for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++)  		__set_phys_to_machine(pfn, INVALID_P2M_ENTRY); +	}  }  static unsigned long __init xen_do_chunk(unsigned long start, diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index ae8a00c39de..45329c8c226 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled)  {  #ifdef CONFIG_XEN_PVHVM  	int cpu; -	xen_hvm_resume_shared_info(); +	xen_hvm_init_shared_info();  	xen_callback_vector();  	xen_unplug_emulated_devices();  	if (xen_feature(XENFEAT_hvm_safe_pvclock)) { diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 1e4329e04e0..202d4c15015 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -41,7 +41,7 @@ void xen_enable_syscall(void);  void xen_vcpu_restore(void);  void xen_callback_vector(void); -void xen_hvm_resume_shared_info(void); +void xen_hvm_init_shared_info(void);  void xen_unplug_emulated_devices(void);  void __init xen_build_dynamic_phys_to_machine(void);  |