diff options
Diffstat (limited to 'drivers/xen')
| -rw-r--r-- | drivers/xen/Kconfig | 10 | ||||
| -rw-r--r-- | drivers/xen/Makefile | 4 | ||||
| -rw-r--r-- | drivers/xen/balloon.c | 62 | ||||
| -rw-r--r-- | drivers/xen/events.c | 87 | ||||
| -rw-r--r-- | drivers/xen/gntdev.c | 39 | ||||
| -rw-r--r-- | drivers/xen/grant-table.c | 6 | ||||
| -rw-r--r-- | drivers/xen/pci.c | 105 | ||||
| -rw-r--r-- | drivers/xen/swiotlb-xen.c | 70 | ||||
| -rw-r--r-- | drivers/xen/xen-pciback/conf_space.c | 1 | ||||
| -rw-r--r-- | drivers/xen/xen-pciback/conf_space_header.c | 5 | ||||
| -rw-r--r-- | drivers/xen/xen-pciback/conf_space_quirks.c | 3 | ||||
| -rw-r--r-- | drivers/xen/xen-pciback/passthrough.c | 34 | ||||
| -rw-r--r-- | drivers/xen/xen-pciback/pci_stub.c | 35 | ||||
| -rw-r--r-- | drivers/xen/xen-pciback/pciback.h | 32 | ||||
| -rw-r--r-- | drivers/xen/xen-pciback/pciback_ops.c | 1 | ||||
| -rw-r--r-- | drivers/xen/xen-pciback/vpci.c | 35 | ||||
| -rw-r--r-- | drivers/xen/xen-pciback/xenbus.c | 27 | ||||
| -rw-r--r-- | drivers/xen/xen-selfballoon.c | 67 | ||||
| -rw-r--r-- | drivers/xen/xenbus/xenbus_comms.c | 4 | ||||
| -rw-r--r-- | drivers/xen/xenbus/xenbus_probe.c | 101 | ||||
| -rw-r--r-- | drivers/xen/xenbus/xenbus_probe_backend.c | 2 | ||||
| -rw-r--r-- | drivers/xen/xenbus/xenbus_probe_frontend.c | 121 | ||||
| -rw-r--r-- | drivers/xen/xenbus/xenbus_xs.c | 17 | 
23 files changed, 612 insertions, 256 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 5f7ff8e2fc1..8795480c235 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -137,16 +137,6 @@ config XEN_GRANT_DEV_ALLOC  	  to other domains. This can be used to implement frontend drivers  	  or as part of an inter-domain shared memory channel. -config XEN_PLATFORM_PCI -	tristate "xen platform pci device driver" -	depends on XEN_PVHVM && PCI -	default m -	help -	  Driver for the Xen PCI Platform device: it is responsible for -	  initializing xenbus and grant_table when running in a Xen HVM -	  domain. As a consequence this driver is required to run any Xen PV -	  frontend on Xen HVM. -  config SWIOTLB_XEN  	def_bool y  	depends on PCI diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 72bbb27d7a6..974fffdf22b 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_XEN_GNTDEV)		+= xen-gntdev.o  obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o  obj-$(CONFIG_XENFS)			+= xenfs/  obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o -obj-$(CONFIG_XEN_PLATFORM_PCI)		+= xen-platform-pci.o +obj-$(CONFIG_XEN_PVHVM)			+= platform-pci.o  obj-$(CONFIG_XEN_TMEM)			+= tmem.o  obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o  obj-$(CONFIG_XEN_DOM0)			+= pci.o @@ -23,5 +23,3 @@ obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= xen-pciback/  xen-evtchn-y				:= evtchn.o  xen-gntdev-y				:= gntdev.o  xen-gntalloc-y				:= gntalloc.o - -xen-platform-pci-y			:= platform-pci.o diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 5dfd8f8ff07..5876e1ae6c2 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -501,20 +501,24 @@ EXPORT_SYMBOL_GPL(balloon_set_new_target);   * alloc_xenballooned_pages - get pages that have been ballooned out   * @nr_pages: Number of pages to get   * @pages: pages returned + * @highmem: highmem or lowmem pages   * @return 0 on success, error otherwise   */ -int alloc_xenballooned_pages(int nr_pages, struct page** pages) +int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem)  {  	int pgno = 0;  	struct page* page;  	mutex_lock(&balloon_mutex);  	while (pgno < nr_pages) { -		page = balloon_retrieve(true); -		if (page) { +		page = balloon_retrieve(highmem); +		if (page && PageHighMem(page) == highmem) {  			pages[pgno++] = page;  		} else {  			enum bp_state st; -			st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER); +			if (page) +				balloon_append(page); +			st = decrease_reservation(nr_pages - pgno, +					highmem ? GFP_HIGHUSER : GFP_USER);  			if (st != BP_DONE)  				goto out_undo;  		} @@ -555,17 +559,40 @@ void free_xenballooned_pages(int nr_pages, struct page** pages)  }  EXPORT_SYMBOL(free_xenballooned_pages); -static int __init balloon_init(void) +static void __init balloon_add_region(unsigned long start_pfn, +				      unsigned long pages)  {  	unsigned long pfn, extra_pfn_end;  	struct page *page; +	/* +	 * If the amount of usable memory has been limited (e.g., with +	 * the 'mem' command line parameter), don't add pages beyond +	 * this limit. +	 */ +	extra_pfn_end = min(max_pfn, start_pfn + pages); + +	for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) { +		page = pfn_to_page(pfn); +		/* totalram_pages and totalhigh_pages do not +		   include the boot-time balloon extension, so +		   don't subtract from it. */ +		__balloon_append(page); +	} +} + +static int __init balloon_init(void) +{ +	int i; +  	if (!xen_domain())  		return -ENODEV;  	pr_info("xen/balloon: Initialising balloon driver.\n"); -	balloon_stats.current_pages = xen_pv_domain() ? min(xen_start_info->nr_pages, max_pfn) : max_pfn; +	balloon_stats.current_pages = xen_pv_domain() +		? min(xen_start_info->nr_pages - xen_released_pages, max_pfn) +		: max_pfn;  	balloon_stats.target_pages  = balloon_stats.current_pages;  	balloon_stats.balloon_low   = 0;  	balloon_stats.balloon_high  = 0; @@ -584,24 +611,13 @@ static int __init balloon_init(void)  #endif  	/* -	 * Initialise the balloon with excess memory space.  We need -	 * to make sure we don't add memory which doesn't exist or -	 * logically exist.  The E820 map can be trimmed to be smaller -	 * than the amount of physical memory due to the mem= command -	 * line parameter.  And if this is a 32-bit non-HIGHMEM kernel -	 * on a system with memory which requires highmem to access, -	 * don't try to use it. +	 * Initialize the balloon with pages from the extra memory +	 * regions (see arch/x86/xen/setup.c).  	 */ -	extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()), -			    (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size)); -	for (pfn = PFN_UP(xen_extra_mem_start); -	     pfn < extra_pfn_end; -	     pfn++) { -		page = pfn_to_page(pfn); -		/* totalram_pages and totalhigh_pages do not include the boot-time -		   balloon extension, so don't subtract from it. */ -		__balloon_append(page); -	} +	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) +		if (xen_extra_mem[i].size) +			balloon_add_region(PFN_UP(xen_extra_mem[i].start), +					   PFN_DOWN(xen_extra_mem[i].size));  	return 0;  } diff --git a/drivers/xen/events.c b/drivers/xen/events.c index da70f5c32eb..7a55b292bf3 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -54,7 +54,7 @@   * This lock protects updates to the following mapping and reference-count   * arrays. The lock does not need to be acquired to read the mapping tables.   */ -static DEFINE_SPINLOCK(irq_mapping_update_lock); +static DEFINE_MUTEX(irq_mapping_update_lock);  static LIST_HEAD(xen_irq_list_head); @@ -432,7 +432,8 @@ static int __must_check xen_allocate_irq_dynamic(void)  	irq = irq_alloc_desc_from(first, -1); -	xen_irq_init(irq); +	if (irq >= 0) +		xen_irq_init(irq);  	return irq;  } @@ -631,7 +632,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,  	int irq = -1;  	struct physdev_irq irq_op; -	spin_lock(&irq_mapping_update_lock); +	mutex_lock(&irq_mapping_update_lock);  	irq = find_irq_by_gsi(gsi);  	if (irq != -1) { @@ -684,7 +685,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,  				handle_edge_irq, name);  out: -	spin_unlock(&irq_mapping_update_lock); +	mutex_unlock(&irq_mapping_update_lock);  	return irq;  } @@ -710,10 +711,10 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,  {  	int irq, ret; -	spin_lock(&irq_mapping_update_lock); +	mutex_lock(&irq_mapping_update_lock);  	irq = xen_allocate_irq_dynamic(); -	if (irq == -1) +	if (irq < 0)  		goto out;  	irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq, @@ -724,12 +725,12 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,  	if (ret < 0)  		goto error_irq;  out: -	spin_unlock(&irq_mapping_update_lock); +	mutex_unlock(&irq_mapping_update_lock);  	return irq;  error_irq: -	spin_unlock(&irq_mapping_update_lock); +	mutex_unlock(&irq_mapping_update_lock);  	xen_free_irq(irq); -	return -1; +	return ret;  }  #endif @@ -740,7 +741,7 @@ int xen_destroy_irq(int irq)  	struct irq_info *info = info_for_irq(irq);  	int rc = -ENOENT; -	spin_lock(&irq_mapping_update_lock); +	mutex_lock(&irq_mapping_update_lock);  	desc = irq_to_desc(irq);  	if (!desc) @@ -766,7 +767,7 @@ int xen_destroy_irq(int irq)  	xen_free_irq(irq);  out: -	spin_unlock(&irq_mapping_update_lock); +	mutex_unlock(&irq_mapping_update_lock);  	return rc;  } @@ -776,10 +777,10 @@ int xen_irq_from_pirq(unsigned pirq)  	struct irq_info *info; -	spin_lock(&irq_mapping_update_lock); +	mutex_lock(&irq_mapping_update_lock);  	list_for_each_entry(info, &xen_irq_list_head, list) { -		if (info == NULL || info->type != IRQT_PIRQ) +		if (info->type != IRQT_PIRQ)  			continue;  		irq = info->irq;  		if (info->u.pirq.pirq == pirq) @@ -787,7 +788,7 @@ int xen_irq_from_pirq(unsigned pirq)  	}  	irq = -1;  out: -	spin_unlock(&irq_mapping_update_lock); +	mutex_unlock(&irq_mapping_update_lock);  	return irq;  } @@ -802,7 +803,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)  {  	int irq; -	spin_lock(&irq_mapping_update_lock); +	mutex_lock(&irq_mapping_update_lock);  	irq = evtchn_to_irq[evtchn]; @@ -818,7 +819,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)  	}  out: -	spin_unlock(&irq_mapping_update_lock); +	mutex_unlock(&irq_mapping_update_lock);  	return irq;  } @@ -829,7 +830,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)  	struct evtchn_bind_ipi bind_ipi;  	int evtchn, irq; -	spin_lock(&irq_mapping_update_lock); +	mutex_lock(&irq_mapping_update_lock);  	irq = per_cpu(ipi_to_irq, cpu)[ipi]; @@ -853,7 +854,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)  	}   out: -	spin_unlock(&irq_mapping_update_lock); +	mutex_unlock(&irq_mapping_update_lock);  	return irq;  } @@ -872,13 +873,34 @@ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,  	return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);  } +static int find_virq(unsigned int virq, unsigned int cpu) +{ +	struct evtchn_status status; +	int port, rc = -ENOENT; + +	memset(&status, 0, sizeof(status)); +	for (port = 0; port <= NR_EVENT_CHANNELS; port++) { +		status.dom = DOMID_SELF; +		status.port = port; +		rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status); +		if (rc < 0) +			continue; +		if (status.status != EVTCHNSTAT_virq) +			continue; +		if (status.u.virq == virq && status.vcpu == cpu) { +			rc = port; +			break; +		} +	} +	return rc; +}  int bind_virq_to_irq(unsigned int virq, unsigned int cpu)  {  	struct evtchn_bind_virq bind_virq; -	int evtchn, irq; +	int evtchn, irq, ret; -	spin_lock(&irq_mapping_update_lock); +	mutex_lock(&irq_mapping_update_lock);  	irq = per_cpu(virq_to_irq, cpu)[virq]; @@ -892,10 +914,16 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)  		bind_virq.virq = virq;  		bind_virq.vcpu = cpu; -		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, -						&bind_virq) != 0) -			BUG(); -		evtchn = bind_virq.port; +		ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, +						&bind_virq); +		if (ret == 0) +			evtchn = bind_virq.port; +		else { +			if (ret == -EEXIST) +				ret = find_virq(virq, cpu); +			BUG_ON(ret < 0); +			evtchn = ret; +		}  		xen_irq_info_virq_init(cpu, irq, evtchn, virq); @@ -903,7 +931,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)  	}  out: -	spin_unlock(&irq_mapping_update_lock); +	mutex_unlock(&irq_mapping_update_lock);  	return irq;  } @@ -913,7 +941,7 @@ static void unbind_from_irq(unsigned int irq)  	struct evtchn_close close;  	int evtchn = evtchn_from_irq(irq); -	spin_lock(&irq_mapping_update_lock); +	mutex_lock(&irq_mapping_update_lock);  	if (VALID_EVTCHN(evtchn)) {  		close.port = evtchn; @@ -943,7 +971,7 @@ static void unbind_from_irq(unsigned int irq)  	xen_free_irq(irq); -	spin_unlock(&irq_mapping_update_lock); +	mutex_unlock(&irq_mapping_update_lock);  }  int bind_evtchn_to_irqhandler(unsigned int evtchn, @@ -1279,7 +1307,7 @@ void rebind_evtchn_irq(int evtchn, int irq)  	   will also be masked. */  	disable_irq(irq); -	spin_lock(&irq_mapping_update_lock); +	mutex_lock(&irq_mapping_update_lock);  	/* After resume the irq<->evtchn mappings are all cleared out */  	BUG_ON(evtchn_to_irq[evtchn] != -1); @@ -1289,7 +1317,7 @@ void rebind_evtchn_irq(int evtchn, int irq)  	xen_irq_info_evtchn_init(irq, evtchn); -	spin_unlock(&irq_mapping_update_lock); +	mutex_unlock(&irq_mapping_update_lock);  	/* new event channels are always bound to cpu 0 */  	irq_set_affinity(irq, cpumask_of(0)); @@ -1670,6 +1698,7 @@ void __init xen_init_IRQ(void)  	evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),  				    GFP_KERNEL); +	BUG_ON(!evtchn_to_irq);  	for (i = 0; i < NR_EVENT_CHANNELS; i++)  		evtchn_to_irq[i] = -1; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index f914b26cf0c..880798aae2f 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -83,6 +83,7 @@ struct grant_map {  	struct ioctl_gntdev_grant_ref *grants;  	struct gnttab_map_grant_ref   *map_ops;  	struct gnttab_unmap_grant_ref *unmap_ops; +	struct gnttab_map_grant_ref   *kmap_ops;  	struct page **pages;  }; @@ -116,19 +117,22 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)  	add->grants    = kzalloc(sizeof(add->grants[0])    * count, GFP_KERNEL);  	add->map_ops   = kzalloc(sizeof(add->map_ops[0])   * count, GFP_KERNEL);  	add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL); +	add->kmap_ops  = kzalloc(sizeof(add->kmap_ops[0])  * count, GFP_KERNEL);  	add->pages     = kzalloc(sizeof(add->pages[0])     * count, GFP_KERNEL);  	if (NULL == add->grants    ||  	    NULL == add->map_ops   ||  	    NULL == add->unmap_ops || +	    NULL == add->kmap_ops  ||  	    NULL == add->pages)  		goto err; -	if (alloc_xenballooned_pages(count, add->pages)) +	if (alloc_xenballooned_pages(count, add->pages, false /* lowmem */))  		goto err;  	for (i = 0; i < count; i++) {  		add->map_ops[i].handle = -1;  		add->unmap_ops[i].handle = -1; +		add->kmap_ops[i].handle = -1;  	}  	add->index = 0; @@ -142,6 +146,7 @@ err:  	kfree(add->grants);  	kfree(add->map_ops);  	kfree(add->unmap_ops); +	kfree(add->kmap_ops);  	kfree(add);  	return NULL;  } @@ -243,10 +248,35 @@ static int map_grant_pages(struct grant_map *map)  			gnttab_set_unmap_op(&map->unmap_ops[i], addr,  				map->flags, -1 /* handle */);  		} +	} else { +		/* +		 * Setup the map_ops corresponding to the pte entries pointing +		 * to the kernel linear addresses of the struct pages. +		 * These ptes are completely different from the user ptes dealt +		 * with find_grant_ptes. +		 */ +		for (i = 0; i < map->count; i++) { +			unsigned level; +			unsigned long address = (unsigned long) +				pfn_to_kaddr(page_to_pfn(map->pages[i])); +			pte_t *ptep; +			u64 pte_maddr = 0; +			BUG_ON(PageHighMem(map->pages[i])); + +			ptep = lookup_address(address, &level); +			pte_maddr = arbitrary_virt_to_machine(ptep).maddr; +			gnttab_set_map_op(&map->kmap_ops[i], pte_maddr, +				map->flags | +				GNTMAP_host_map | +				GNTMAP_contains_pte, +				map->grants[i].ref, +				map->grants[i].domid); +		}  	}  	pr_debug("map %d+%d\n", map->index, map->count); -	err = gnttab_map_refs(map->map_ops, map->pages, map->count); +	err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL, +			map->pages, map->count);  	if (err)  		return err; @@ -462,13 +492,11 @@ static int gntdev_release(struct inode *inode, struct file *flip)  	pr_debug("priv %p\n", priv); -	spin_lock(&priv->lock);  	while (!list_empty(&priv->maps)) {  		map = list_entry(priv->maps.next, struct grant_map, next);  		list_del(&map->next);  		gntdev_put_map(map);  	} -	spin_unlock(&priv->lock);  	if (use_ptemod)  		mmu_notifier_unregister(&priv->mn, priv->mm); @@ -532,10 +560,11 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,  	map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);  	if (map) {  		list_del(&map->next); -		gntdev_put_map(map);  		err = 0;  	}  	spin_unlock(&priv->lock); +	if (map) +		gntdev_put_map(map);  	return err;  } diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 4f44b347b24..8c71ab80175 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -448,7 +448,8 @@ unsigned int gnttab_max_grant_frames(void)  EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);  int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, -		    struct page **pages, unsigned int count) +			struct gnttab_map_grant_ref *kmap_ops, +			struct page **pages, unsigned int count)  {  	int i, ret;  	pte_t *pte; @@ -488,8 +489,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,  			 */  			return -EOPNOTSUPP;  		} -		ret = m2p_add_override(mfn, pages[i], -				       map_ops[i].flags & GNTMAP_contains_pte); +		ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);  		if (ret)  			return ret;  	} diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c index cef4bafc07d..66057075d6e 100644 --- a/drivers/xen/pci.c +++ b/drivers/xen/pci.c @@ -18,6 +18,7 @@   */  #include <linux/pci.h> +#include <linux/acpi.h>  #include <xen/xen.h>  #include <xen/interface/physdev.h>  #include <xen/interface/xen.h> @@ -26,26 +27,85 @@  #include <asm/xen/hypercall.h>  #include "../pci/pci.h" +static bool __read_mostly pci_seg_supported = true; +  static int xen_add_device(struct device *dev)  {  	int r;  	struct pci_dev *pci_dev = to_pci_dev(dev); +#ifdef CONFIG_PCI_IOV +	struct pci_dev *physfn = pci_dev->physfn; +#endif + +	if (pci_seg_supported) { +		struct physdev_pci_device_add add = { +			.seg = pci_domain_nr(pci_dev->bus), +			.bus = pci_dev->bus->number, +			.devfn = pci_dev->devfn +		}; +#ifdef CONFIG_ACPI +		acpi_handle handle; +#endif  #ifdef CONFIG_PCI_IOV -	if (pci_dev->is_virtfn) { +		if (pci_dev->is_virtfn) { +			add.flags = XEN_PCI_DEV_VIRTFN; +			add.physfn.bus = physfn->bus->number; +			add.physfn.devfn = physfn->devfn; +		} else +#endif +		if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) +			add.flags = XEN_PCI_DEV_EXTFN; + +#ifdef CONFIG_ACPI +		handle = DEVICE_ACPI_HANDLE(&pci_dev->dev); +		if (!handle) +			handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge); +#ifdef CONFIG_PCI_IOV +		if (!handle && pci_dev->is_virtfn) +			handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge); +#endif +		if (handle) { +			acpi_status status; + +			do { +				unsigned long long pxm; + +				status = acpi_evaluate_integer(handle, "_PXM", +							       NULL, &pxm); +				if (ACPI_SUCCESS(status)) { +					add.optarr[0] = pxm; +					add.flags |= XEN_PCI_DEV_PXM; +					break; +				} +				status = acpi_get_parent(handle, &handle); +			} while (ACPI_SUCCESS(status)); +		} +#endif /* CONFIG_ACPI */ + +		r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add); +		if (r != -ENOSYS) +			return r; +		pci_seg_supported = false; +	} + +	if (pci_domain_nr(pci_dev->bus)) +		r = -ENOSYS; +#ifdef CONFIG_PCI_IOV +	else if (pci_dev->is_virtfn) {  		struct physdev_manage_pci_ext manage_pci_ext = {  			.bus		= pci_dev->bus->number,  			.devfn		= pci_dev->devfn,  			.is_virtfn 	= 1, -			.physfn.bus	= pci_dev->physfn->bus->number, -			.physfn.devfn	= pci_dev->physfn->devfn, +			.physfn.bus	= physfn->bus->number, +			.physfn.devfn	= physfn->devfn,  		};  		r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,  			&manage_pci_ext); -	} else +	}  #endif -	if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { +	else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {  		struct physdev_manage_pci_ext manage_pci_ext = {  			.bus		= pci_dev->bus->number,  			.devfn		= pci_dev->devfn, @@ -71,13 +131,27 @@ static int xen_remove_device(struct device *dev)  {  	int r;  	struct pci_dev *pci_dev = to_pci_dev(dev); -	struct physdev_manage_pci manage_pci; -	manage_pci.bus = pci_dev->bus->number; -	manage_pci.devfn = pci_dev->devfn; +	if (pci_seg_supported) { +		struct physdev_pci_device device = { +			.seg = pci_domain_nr(pci_dev->bus), +			.bus = pci_dev->bus->number, +			.devfn = pci_dev->devfn +		}; + +		r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_remove, +					  &device); +	} else if (pci_domain_nr(pci_dev->bus)) +		r = -ENOSYS; +	else { +		struct physdev_manage_pci manage_pci = { +			.bus = pci_dev->bus->number, +			.devfn = pci_dev->devfn +		}; -	r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, -		&manage_pci); +		r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, +					  &manage_pci); +	}  	return r;  } @@ -96,13 +170,16 @@ static int xen_pci_notifier(struct notifier_block *nb,  		r = xen_remove_device(dev);  		break;  	default: -		break; +		return NOTIFY_DONE;  	} - -	return r; +	if (r) +		dev_err(dev, "Failed to %s - passthrough or MSI/MSI-X might fail!\n", +			action == BUS_NOTIFY_ADD_DEVICE ? "add" : +			(action == BUS_NOTIFY_DEL_DEVICE ? "delete" : "?")); +	return NOTIFY_OK;  } -struct notifier_block device_nb = { +static struct notifier_block device_nb = {  	.notifier_call = xen_pci_notifier,  }; diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 6e8c15a2320..c984768d98c 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -38,6 +38,7 @@  #include <xen/swiotlb-xen.h>  #include <xen/page.h>  #include <xen/xen-ops.h> +#include <xen/hvc-console.h>  /*   * Used to do a quick range check in swiotlb_tbl_unmap_single and   * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this @@ -146,8 +147,10 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)  void __init xen_swiotlb_init(int verbose)  {  	unsigned long bytes; -	int rc; +	int rc = -ENOMEM;  	unsigned long nr_tbl; +	char *m = NULL; +	unsigned int repeat = 3;  	nr_tbl = swioltb_nr_tbl();  	if (nr_tbl) @@ -156,16 +159,17 @@ void __init xen_swiotlb_init(int verbose)  		xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);  		xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);  	} - +retry:  	bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;  	/*  	 * Get IO TLB memory from any location.  	 */  	xen_io_tlb_start = alloc_bootmem(bytes); -	if (!xen_io_tlb_start) -		panic("Cannot allocate SWIOTLB buffer"); - +	if (!xen_io_tlb_start) { +		m = "Cannot allocate Xen-SWIOTLB buffer!\n"; +		goto error; +	}  	xen_io_tlb_end = xen_io_tlb_start + bytes;  	/*  	 * And replace that memory with pages under 4GB. @@ -173,17 +177,28 @@ void __init xen_swiotlb_init(int verbose)  	rc = xen_swiotlb_fixup(xen_io_tlb_start,  			       bytes,  			       xen_io_tlb_nslabs); -	if (rc) +	if (rc) { +		free_bootmem(__pa(xen_io_tlb_start), bytes); +		m = "Failed to get contiguous memory for DMA from Xen!\n"\ +		    "You either: don't have the permissions, do not have"\ +		    " enough free memory under 4GB, or the hypervisor memory"\ +		    "is too fragmented!";  		goto error; - +	}  	start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);  	swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose);  	return;  error: -	panic("DMA(%d): Failed to exchange pages allocated for DMA with Xen! "\ -	      "We either don't have the permission or you do not have enough"\ -	      "free memory under 4GB!\n", rc); +	if (repeat--) { +		xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */ +					(xen_io_tlb_nslabs >> 1)); +		printk(KERN_INFO "Xen-SWIOTLB: Lowering to %luMB\n", +		      (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20); +		goto retry; +	} +	xen_raw_printk("%s (rc:%d)", m, rc); +	panic("%s (rc:%d)", m, rc);  }  void * @@ -194,6 +209,8 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,  	int order = get_order(size);  	u64 dma_mask = DMA_BIT_MASK(32);  	unsigned long vstart; +	phys_addr_t phys; +	dma_addr_t dev_addr;  	/*  	* Ignore region specifiers - the kernel's ideas of @@ -209,18 +226,26 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,  	vstart = __get_free_pages(flags, order);  	ret = (void *)vstart; +	if (!ret) +		return ret; +  	if (hwdev && hwdev->coherent_dma_mask) -		dma_mask = dma_alloc_coherent_mask(hwdev, flags); +		dma_mask = hwdev->coherent_dma_mask; -	if (ret) { +	phys = virt_to_phys(ret); +	dev_addr = xen_phys_to_bus(phys); +	if (((dev_addr + size - 1 <= dma_mask)) && +	    !range_straddles_page_boundary(phys, size)) +		*dma_handle = dev_addr; +	else {  		if (xen_create_contiguous_region(vstart, order,  						 fls64(dma_mask)) != 0) {  			free_pages(vstart, order);  			return NULL;  		} -		memset(ret, 0, size);  		*dma_handle = virt_to_machine(ret).maddr;  	} +	memset(ret, 0, size);  	return ret;  }  EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent); @@ -230,11 +255,21 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,  			  dma_addr_t dev_addr)  {  	int order = get_order(size); +	phys_addr_t phys; +	u64 dma_mask = DMA_BIT_MASK(32);  	if (dma_release_from_coherent(hwdev, order, vaddr))  		return; -	xen_destroy_contiguous_region((unsigned long)vaddr, order); +	if (hwdev && hwdev->coherent_dma_mask) +		dma_mask = hwdev->coherent_dma_mask; + +	phys = virt_to_phys(vaddr); + +	if (((dev_addr + size - 1 > dma_mask)) || +	    range_straddles_page_boundary(phys, size)) +		xen_destroy_contiguous_region((unsigned long)vaddr, order); +  	free_pages((unsigned long)vaddr, order);  }  EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent); @@ -278,9 +313,10 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,  	/*  	 * Ensure that the address returned is DMA'ble  	 */ -	if (!dma_capable(dev, dev_addr, size)) -		panic("map_single: bounce buffer is not DMA'ble"); - +	if (!dma_capable(dev, dev_addr, size)) { +		swiotlb_tbl_unmap_single(dev, map, size, dir); +		dev_addr = 0; +	}  	return dev_addr;  }  EXPORT_SYMBOL_GPL(xen_swiotlb_map_page); diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c index a8031445d94..444345afbd5 100644 --- a/drivers/xen/xen-pciback/conf_space.c +++ b/drivers/xen/xen-pciback/conf_space.c @@ -15,7 +15,6 @@  #include "conf_space.h"  #include "conf_space_quirks.h" -#define DRV_NAME	"xen-pciback"  static int permissive;  module_param(permissive, bool, 0644); diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index da3cbdfcb5d..3daf862d739 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -15,7 +15,6 @@ struct pci_bar_info {  	int which;  }; -#define DRV_NAME	"xen-pciback"  #define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))  #define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER) @@ -25,7 +24,7 @@ static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)  	int ret;  	ret = xen_pcibk_read_config_word(dev, offset, value, data); -	if (!atomic_read(&dev->enable_cnt)) +	if (!pci_is_enabled(dev))  		return ret;  	for (i = 0; i < PCI_ROM_RESOURCE; i++) { @@ -187,7 +186,7 @@ static inline void read_dev_bar(struct pci_dev *dev,  	bar_info->val = res[pos].start |  			(res[pos].flags & PCI_REGION_FLAG_MASK); -	bar_info->len_val = res[pos].end - res[pos].start + 1; +	bar_info->len_val = resource_size(&res[pos]);  }  static void *bar_init(struct pci_dev *dev, int offset) diff --git a/drivers/xen/xen-pciback/conf_space_quirks.c b/drivers/xen/xen-pciback/conf_space_quirks.c index 921a889e65e..7476791cab4 100644 --- a/drivers/xen/xen-pciback/conf_space_quirks.c +++ b/drivers/xen/xen-pciback/conf_space_quirks.c @@ -12,7 +12,6 @@  #include "conf_space_quirks.h"  LIST_HEAD(xen_pcibk_quirks); -#define	DRV_NAME	"xen-pciback"  static inline const struct pci_device_id *  match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)  { @@ -36,7 +35,7 @@ static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev)  			goto out;  	tmp_quirk = NULL;  	printk(KERN_DEBUG DRV_NAME -	       ":quirk didn't match any device xen_pciback knows about\n"); +	       ": quirk didn't match any device known\n");  out:  	return tmp_quirk;  } diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c index 1d32a9a42c0..828dddc360d 100644 --- a/drivers/xen/xen-pciback/passthrough.c +++ b/drivers/xen/xen-pciback/passthrough.c @@ -7,13 +7,13 @@  #include <linux/list.h>  #include <linux/pci.h> -#include <linux/spinlock.h> +#include <linux/mutex.h>  #include "pciback.h"  struct passthrough_dev_data {  	/* Access to dev_list must be protected by lock */  	struct list_head dev_list; -	spinlock_t lock; +	struct mutex lock;  };  static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, @@ -24,9 +24,8 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,  	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;  	struct pci_dev_entry *dev_entry;  	struct pci_dev *dev = NULL; -	unsigned long flags; -	spin_lock_irqsave(&dev_data->lock, flags); +	mutex_lock(&dev_data->lock);  	list_for_each_entry(dev_entry, &dev_data->dev_list, list) {  		if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus) @@ -37,7 +36,7 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,  		}  	} -	spin_unlock_irqrestore(&dev_data->lock, flags); +	mutex_unlock(&dev_data->lock);  	return dev;  } @@ -48,7 +47,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,  {  	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;  	struct pci_dev_entry *dev_entry; -	unsigned long flags;  	unsigned int domain, bus, devfn;  	int err; @@ -57,9 +55,9 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,  		return -ENOMEM;  	dev_entry->dev = dev; -	spin_lock_irqsave(&dev_data->lock, flags); +	mutex_lock(&dev_data->lock);  	list_add_tail(&dev_entry->list, &dev_data->dev_list); -	spin_unlock_irqrestore(&dev_data->lock, flags); +	mutex_unlock(&dev_data->lock);  	/* Publish this device. */  	domain = (unsigned int)pci_domain_nr(dev->bus); @@ -76,9 +74,8 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,  	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;  	struct pci_dev_entry *dev_entry, *t;  	struct pci_dev *found_dev = NULL; -	unsigned long flags; -	spin_lock_irqsave(&dev_data->lock, flags); +	mutex_lock(&dev_data->lock);  	list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {  		if (dev_entry->dev == dev) { @@ -88,7 +85,7 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,  		}  	} -	spin_unlock_irqrestore(&dev_data->lock, flags); +	mutex_unlock(&dev_data->lock);  	if (found_dev)  		pcistub_put_pci_dev(found_dev); @@ -102,7 +99,7 @@ static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)  	if (!dev_data)  		return -ENOMEM; -	spin_lock_init(&dev_data->lock); +	mutex_init(&dev_data->lock);  	INIT_LIST_HEAD(&dev_data->dev_list); @@ -116,14 +113,14 @@ static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,  {  	int err = 0;  	struct passthrough_dev_data *dev_data = pdev->pci_dev_data; -	struct pci_dev_entry *dev_entry, *e, *tmp; +	struct pci_dev_entry *dev_entry, *e;  	struct pci_dev *dev;  	int found;  	unsigned int domain, bus; -	spin_lock(&dev_data->lock); +	mutex_lock(&dev_data->lock); -	list_for_each_entry_safe(dev_entry, tmp, &dev_data->dev_list, list) { +	list_for_each_entry(dev_entry, &dev_data->dev_list, list) {  		/* Only publish this device as a root if none of its  		 * parent bridges are exported  		 */ @@ -142,16 +139,13 @@ static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,  		bus = (unsigned int)dev_entry->dev->bus->number;  		if (!found) { -			spin_unlock(&dev_data->lock);  			err = publish_root_cb(pdev, domain, bus);  			if (err)  				break; -			spin_lock(&dev_data->lock);  		}  	} -	if (!err) -		spin_unlock(&dev_data->lock); +	mutex_unlock(&dev_data->lock);  	return err;  } @@ -182,7 +176,7 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,  	return 1;  } -struct xen_pcibk_backend xen_pcibk_passthrough_backend = { +const struct xen_pcibk_backend xen_pcibk_passthrough_backend = {  	.name           = "passthrough",  	.init           = __xen_pcibk_init_devices,  	.free		= __xen_pcibk_release_devices, diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index aec214ac0a1..8f06e1ed028 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -21,8 +21,6 @@  #include "conf_space.h"  #include "conf_space_quirks.h" -#define DRV_NAME	"xen-pciback" -  static char *pci_devs_to_hide;  wait_queue_head_t xen_pcibk_aer_wait_queue;  /*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops, @@ -222,6 +220,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev)  	}  	spin_unlock_irqrestore(&pcistub_devices_lock, flags); +	if (WARN_ON(!found_psdev)) +		return;  	/*hold this lock for avoiding breaking link between  	* pcistub and xen_pcibk when AER is in processing @@ -514,12 +514,9 @@ static void kill_domain_by_device(struct pcistub_device *psdev)  	int err;  	char nodename[PCI_NODENAME_MAX]; -	if (!psdev) -		dev_err(&psdev->dev->dev, -			"device is NULL when do AER recovery/kill_domain\n"); +	BUG_ON(!psdev);  	snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",  		psdev->pdev->xdev->otherend_id); -	nodename[strlen(nodename)] = '\0';  again:  	err = xenbus_transaction_start(&xbt); @@ -605,7 +602,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,  	if (test_bit(_XEN_PCIF_active,  		(unsigned long *)&psdev->pdev->sh_info->flags)) {  		dev_dbg(&psdev->dev->dev, -			"schedule pci_conf service in xen_pcibk\n"); +			"schedule pci_conf service in " DRV_NAME "\n");  		xen_pcibk_test_and_schedule_op(psdev->pdev);  	} @@ -995,8 +992,7 @@ out:  		err = count;  	return err;  } - -DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add); +static DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);  static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,  				   size_t count) @@ -1015,8 +1011,7 @@ out:  		err = count;  	return err;  } - -DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove); +static DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);  static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)  { @@ -1039,8 +1034,7 @@ static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)  	return count;  } - -DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); +static DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);  static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)  { @@ -1069,8 +1063,7 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)  	spin_unlock_irqrestore(&pcistub_devices_lock, flags);  	return count;  } - -DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL); +static DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);  static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,  					  const char *buf, @@ -1106,7 +1099,8 @@ out:  		err = count;  	return err;  } -DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch); +static DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, +		   pcistub_irq_handler_switch);  static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,  				 size_t count) @@ -1170,8 +1164,8 @@ out:  	return count;  } - -DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add); +static DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, +		   pcistub_quirk_add);  static ssize_t permissive_add(struct device_driver *drv, const char *buf,  			      size_t count) @@ -1236,8 +1230,8 @@ static ssize_t permissive_show(struct device_driver *drv, char *buf)  	spin_unlock_irqrestore(&pcistub_devices_lock, flags);  	return count;  } - -DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add); +static DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, +		   permissive_add);  static void pcistub_exit(void)  { @@ -1374,3 +1368,4 @@ module_init(xen_pcibk_init);  module_exit(xen_pcibk_cleanup);  MODULE_LICENSE("Dual BSD/GPL"); +MODULE_ALIAS("xen-backend:pci"); diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index a0e131a8150..e9b4011c5f9 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -15,6 +15,8 @@  #include <linux/atomic.h>  #include <xen/interface/io/pciif.h> +#define DRV_NAME	"xen-pciback" +  struct pci_dev_entry {  	struct list_head list;  	struct pci_dev *dev; @@ -27,7 +29,7 @@ struct pci_dev_entry {  struct xen_pcibk_device {  	void *pci_dev_data; -	spinlock_t dev_lock; +	struct mutex dev_lock;  	struct xenbus_device *xdev;  	struct xenbus_watch be_watch;  	u8 be_watching; @@ -89,7 +91,7 @@ typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev,   *  passthrough - BDFs are exactly like in the host.   */  struct xen_pcibk_backend { -	char *name; +	const char *name;  	int (*init)(struct xen_pcibk_device *pdev);  	void (*free)(struct xen_pcibk_device *pdev);  	int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev, @@ -104,9 +106,9 @@ struct xen_pcibk_backend {  			       unsigned int devfn);  }; -extern struct xen_pcibk_backend xen_pcibk_vpci_backend; -extern struct xen_pcibk_backend xen_pcibk_passthrough_backend; -extern struct xen_pcibk_backend *xen_pcibk_backend; +extern const struct xen_pcibk_backend xen_pcibk_vpci_backend; +extern const struct xen_pcibk_backend xen_pcibk_passthrough_backend; +extern const struct xen_pcibk_backend *xen_pcibk_backend;  static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,  					struct pci_dev *dev, @@ -116,13 +118,14 @@ static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,  	if (xen_pcibk_backend && xen_pcibk_backend->add)  		return xen_pcibk_backend->add(pdev, dev, devid, publish_cb);  	return -1; -}; +} +  static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,  					     struct pci_dev *dev)  {  	if (xen_pcibk_backend && xen_pcibk_backend->free)  		return xen_pcibk_backend->release(pdev, dev); -}; +}  static inline struct pci_dev *  xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain, @@ -131,7 +134,8 @@ xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain,  	if (xen_pcibk_backend && xen_pcibk_backend->get)  		return xen_pcibk_backend->get(pdev, domain, bus, devfn);  	return NULL; -}; +} +  /**  * Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk  * before sending aer request to pcifront, so that guest could identify @@ -148,25 +152,29 @@ static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,  		return xen_pcibk_backend->find(pcidev, pdev, domain, bus,  					       devfn);  	return -1; -}; +} +  static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev)  {  	if (xen_pcibk_backend && xen_pcibk_backend->init)  		return xen_pcibk_backend->init(pdev);  	return -1; -}; +} +  static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,  					      publish_pci_root_cb cb)  {  	if (xen_pcibk_backend && xen_pcibk_backend->publish)  		return xen_pcibk_backend->publish(pdev, cb);  	return -1; -}; +} +  static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)  {  	if (xen_pcibk_backend && xen_pcibk_backend->free)  		return xen_pcibk_backend->free(pdev); -}; +} +  /* Handles events from front-end */  irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);  void xen_pcibk_do_op(struct work_struct *data); diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 8c95c3415b7..63616d7453e 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -10,7 +10,6 @@  #include <linux/sched.h>  #include "pciback.h" -#define DRV_NAME	"xen-pciback"  int verbose_request;  module_param(verbose_request, int, 0644); diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c index 4a42cfb0959..46d140baebd 100644 --- a/drivers/xen/xen-pciback/vpci.c +++ b/drivers/xen/xen-pciback/vpci.c @@ -8,16 +8,15 @@  #include <linux/list.h>  #include <linux/slab.h>  #include <linux/pci.h> -#include <linux/spinlock.h> +#include <linux/mutex.h>  #include "pciback.h"  #define PCI_SLOT_MAX 32 -#define DRV_NAME	"xen-pciback"  struct vpci_dev_data {  	/* Access to dev_list must be protected by lock */  	struct list_head dev_list[PCI_SLOT_MAX]; -	spinlock_t lock; +	struct mutex lock;  };  static inline struct list_head *list_first(struct list_head *head) @@ -33,13 +32,12 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,  	struct pci_dev_entry *entry;  	struct pci_dev *dev = NULL;  	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; -	unsigned long flags;  	if (domain != 0 || bus != 0)  		return NULL;  	if (PCI_SLOT(devfn) < PCI_SLOT_MAX) { -		spin_lock_irqsave(&vpci_dev->lock, flags); +		mutex_lock(&vpci_dev->lock);  		list_for_each_entry(entry,  				    &vpci_dev->dev_list[PCI_SLOT(devfn)], @@ -50,7 +48,7 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,  			}  		} -		spin_unlock_irqrestore(&vpci_dev->lock, flags); +		mutex_unlock(&vpci_dev->lock);  	}  	return dev;  } @@ -71,7 +69,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,  	int err = 0, slot, func = -1;  	struct pci_dev_entry *t, *dev_entry;  	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; -	unsigned long flags;  	if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {  		err = -EFAULT; @@ -90,7 +87,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,  	dev_entry->dev = dev; -	spin_lock_irqsave(&vpci_dev->lock, flags); +	mutex_lock(&vpci_dev->lock);  	/* Keep multi-function devices together on the virtual PCI bus */  	for (slot = 0; slot < PCI_SLOT_MAX; slot++) { @@ -129,7 +126,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,  			 "No more space on root virtual PCI bus");  unlock: -	spin_unlock_irqrestore(&vpci_dev->lock, flags); +	mutex_unlock(&vpci_dev->lock);  	/* Publish this device. */  	if (!err) @@ -145,14 +142,13 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,  	int slot;  	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;  	struct pci_dev *found_dev = NULL; -	unsigned long flags; -	spin_lock_irqsave(&vpci_dev->lock, flags); +	mutex_lock(&vpci_dev->lock);  	for (slot = 0; slot < PCI_SLOT_MAX; slot++) { -		struct pci_dev_entry *e, *tmp; -		list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], -					 list) { +		struct pci_dev_entry *e; + +		list_for_each_entry(e, &vpci_dev->dev_list[slot], list) {  			if (e->dev == dev) {  				list_del(&e->list);  				found_dev = e->dev; @@ -163,7 +159,7 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,  	}  out: -	spin_unlock_irqrestore(&vpci_dev->lock, flags); +	mutex_unlock(&vpci_dev->lock);  	if (found_dev)  		pcistub_put_pci_dev(found_dev); @@ -178,7 +174,7 @@ static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)  	if (!vpci_dev)  		return -ENOMEM; -	spin_lock_init(&vpci_dev->lock); +	mutex_init(&vpci_dev->lock);  	for (slot = 0; slot < PCI_SLOT_MAX; slot++)  		INIT_LIST_HEAD(&vpci_dev->dev_list[slot]); @@ -222,10 +218,9 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,  	struct pci_dev_entry *entry;  	struct pci_dev *dev = NULL;  	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; -	unsigned long flags;  	int found = 0, slot; -	spin_lock_irqsave(&vpci_dev->lock, flags); +	mutex_lock(&vpci_dev->lock);  	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {  		list_for_each_entry(entry,  			    &vpci_dev->dev_list[slot], @@ -243,11 +238,11 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,  			}  		}  	} -	spin_unlock_irqrestore(&vpci_dev->lock, flags); +	mutex_unlock(&vpci_dev->lock);  	return found;  } -struct xen_pcibk_backend xen_pcibk_vpci_backend = { +const struct xen_pcibk_backend xen_pcibk_vpci_backend = {  	.name		= "vpci",  	.init		= __xen_pcibk_init_devices,  	.free		= __xen_pcibk_release_devices, diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 978d2c6f5dc..474d52ec337 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -13,7 +13,6 @@  #include <asm/xen/pci.h>  #include "pciback.h" -#define	DRV_NAME	"xen-pciback"  #define INVALID_EVTCHN_IRQ  (-1)  struct workqueue_struct *xen_pcibk_wq; @@ -44,7 +43,7 @@ static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev)  	pdev->xdev = xdev;  	dev_set_drvdata(&xdev->dev, pdev); -	spin_lock_init(&pdev->dev_lock); +	mutex_init(&pdev->dev_lock);  	pdev->sh_info = NULL;  	pdev->evtchn_irq = INVALID_EVTCHN_IRQ; @@ -62,14 +61,12 @@ out:  static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)  { -	spin_lock(&pdev->dev_lock); - +	mutex_lock(&pdev->dev_lock);  	/* Ensure the guest can't trigger our handler before removing devices */  	if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {  		unbind_from_irqhandler(pdev->evtchn_irq, pdev);  		pdev->evtchn_irq = INVALID_EVTCHN_IRQ;  	} -	spin_unlock(&pdev->dev_lock);  	/* If the driver domain started an op, make sure we complete it  	 * before releasing the shared memory */ @@ -77,13 +74,11 @@ static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)  	/* Note, the workqueue does not use spinlocks at all.*/  	flush_workqueue(xen_pcibk_wq); -	spin_lock(&pdev->dev_lock);  	if (pdev->sh_info != NULL) {  		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);  		pdev->sh_info = NULL;  	} -	spin_unlock(&pdev->dev_lock); - +	mutex_unlock(&pdev->dev_lock);  }  static void free_pdev(struct xen_pcibk_device *pdev) @@ -120,9 +115,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,  		goto out;  	} -	spin_lock(&pdev->dev_lock);  	pdev->sh_info = vaddr; -	spin_unlock(&pdev->dev_lock);  	err = bind_interdomain_evtchn_to_irqhandler(  		pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, @@ -132,10 +125,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,  				 "Error binding event channel to IRQ");  		goto out;  	} - -	spin_lock(&pdev->dev_lock);  	pdev->evtchn_irq = err; -	spin_unlock(&pdev->dev_lock);  	err = 0;  	dev_dbg(&pdev->xdev->dev, "Attached!\n"); @@ -150,6 +140,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev)  	char *magic = NULL; +	mutex_lock(&pdev->dev_lock);  	/* Make sure we only do this setup once */  	if (xenbus_read_driver_state(pdev->xdev->nodename) !=  	    XenbusStateInitialised) @@ -176,7 +167,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev)  	if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {  		xenbus_dev_fatal(pdev->xdev, -EFAULT,  				 "version mismatch (%s/%s) with pcifront - " -				 "halting xen_pcibk", +				 "halting " DRV_NAME,  				 magic, XEN_PCI_MAGIC);  		goto out;  	} @@ -194,6 +185,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev)  	dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);  out: +	mutex_unlock(&pdev->dev_lock);  	kfree(magic); @@ -369,6 +361,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)  	dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); +	mutex_lock(&pdev->dev_lock);  	/* Make sure we only reconfigure once */  	if (xenbus_read_driver_state(pdev->xdev->nodename) !=  	    XenbusStateReconfiguring) @@ -506,6 +499,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)  	}  out: +	mutex_unlock(&pdev->dev_lock);  	return 0;  } @@ -562,6 +556,7 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)  	char dev_str[64];  	char state_str[64]; +	mutex_lock(&pdev->dev_lock);  	/* It's possible we could get the call to setup twice, so make sure  	 * we're not already connected.  	 */ @@ -642,10 +637,10 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)  				 "Error switching to initialised state!");  out: +	mutex_unlock(&pdev->dev_lock);  	if (!err)  		/* see if pcifront is already configured (if not, we'll wait) */  		xen_pcibk_attach(pdev); -  	return err;  } @@ -724,7 +719,7 @@ static struct xenbus_driver xenbus_xen_pcibk_driver = {  	.otherend_changed	= xen_pcibk_frontend_changed,  }; -struct xen_pcibk_backend *xen_pcibk_backend; +const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend;  int __init xen_pcibk_xenbus_register(void)  { diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 6ea852e2516..d93c70857e0 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -68,6 +68,8 @@   */  #include <linux/kernel.h> +#include <linux/bootmem.h> +#include <linux/swap.h>  #include <linux/mm.h>  #include <linux/mman.h>  #include <linux/module.h> @@ -93,6 +95,15 @@ static unsigned int selfballoon_uphysteresis __read_mostly = 1;  /* In HZ, controls frequency of worker invocation. */  static unsigned int selfballoon_interval __read_mostly = 5; +/* + * Minimum usable RAM in MB for selfballooning target for balloon. + * If non-zero, it is added to totalreserve_pages and self-ballooning + * will not balloon below the sum.  If zero, a piecewise linear function + * is calculated as a minimum and added to totalreserve_pages.  Note that + * setting this value indiscriminately may cause OOMs and crashes. + */ +static unsigned int selfballoon_min_usable_mb; +  static void selfballoon_process(struct work_struct *work);  static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); @@ -189,20 +200,23 @@ static int __init xen_selfballooning_setup(char *s)  __setup("selfballooning", xen_selfballooning_setup);  #endif /* CONFIG_FRONTSWAP */ +#define MB2PAGES(mb)	((mb) << (20 - PAGE_SHIFT)) +  /*   * Use current balloon size, the goal (vm_committed_as), and hysteresis   * parameters to set a new target balloon size   */  static void selfballoon_process(struct work_struct *work)  { -	unsigned long cur_pages, goal_pages, tgt_pages; +	unsigned long cur_pages, goal_pages, tgt_pages, floor_pages; +	unsigned long useful_pages;  	bool reset_timer = false;  	if (xen_selfballooning_enabled) { -		cur_pages = balloon_stats.current_pages; +		cur_pages = totalram_pages;  		tgt_pages = cur_pages; /* default is no change */  		goal_pages = percpu_counter_read_positive(&vm_committed_as) + -			balloon_stats.current_pages - totalram_pages; +				totalreserve_pages;  #ifdef CONFIG_FRONTSWAP  		/* allow space for frontswap pages to be repatriated */  		if (frontswap_selfshrinking && frontswap_enabled) @@ -217,7 +231,26 @@ static void selfballoon_process(struct work_struct *work)  				((goal_pages - cur_pages) /  				  selfballoon_uphysteresis);  		/* else if cur_pages == goal_pages, no change */ -		balloon_set_new_target(tgt_pages); +		useful_pages = max_pfn - totalreserve_pages; +		if (selfballoon_min_usable_mb != 0) +			floor_pages = totalreserve_pages + +					MB2PAGES(selfballoon_min_usable_mb); +		/* piecewise linear function ending in ~3% slope */ +		else if (useful_pages < MB2PAGES(16)) +			floor_pages = max_pfn; /* not worth ballooning */ +		else if (useful_pages < MB2PAGES(64)) +			floor_pages = totalreserve_pages + MB2PAGES(16) + +					((useful_pages - MB2PAGES(16)) >> 1); +		else if (useful_pages < MB2PAGES(512)) +			floor_pages = totalreserve_pages + MB2PAGES(40) + +					((useful_pages - MB2PAGES(40)) >> 3); +		else /* useful_pages >= MB2PAGES(512) */ +			floor_pages = totalreserve_pages + MB2PAGES(99) + +					((useful_pages - MB2PAGES(99)) >> 5); +		if (tgt_pages < floor_pages) +			tgt_pages = floor_pages; +		balloon_set_new_target(tgt_pages + +			balloon_stats.current_pages - totalram_pages);  		reset_timer = true;  	}  #ifdef CONFIG_FRONTSWAP @@ -340,6 +373,31 @@ static ssize_t store_selfballoon_uphys(struct sys_device *dev,  static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR,  		   show_selfballoon_uphys, store_selfballoon_uphys); +SELFBALLOON_SHOW(selfballoon_min_usable_mb, "%d\n", +				selfballoon_min_usable_mb); + +static ssize_t store_selfballoon_min_usable_mb(struct sys_device *dev, +					       struct sysdev_attribute *attr, +					       const char *buf, +					       size_t count) +{ +	unsigned long val; +	int err; + +	if (!capable(CAP_SYS_ADMIN)) +		return -EPERM; +	err = strict_strtoul(buf, 10, &val); +	if (err || val == 0) +		return -EINVAL; +	selfballoon_min_usable_mb = val; +	return count; +} + +static SYSDEV_ATTR(selfballoon_min_usable_mb, S_IRUGO | S_IWUSR, +		   show_selfballoon_min_usable_mb, +		   store_selfballoon_min_usable_mb); + +  #ifdef CONFIG_FRONTSWAP  SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); @@ -421,6 +479,7 @@ static struct attribute *selfballoon_attrs[] = {  	&attr_selfballoon_interval.attr,  	&attr_selfballoon_downhysteresis.attr,  	&attr_selfballoon_uphysteresis.attr, +	&attr_selfballoon_min_usable_mb.attr,  #ifdef CONFIG_FRONTSWAP  	&attr_frontswap_selfshrinking.attr,  	&attr_frontswap_hysteresis.attr, diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 090c61ee8fd..2eff7a6aaa2 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -212,7 +212,9 @@ int xb_init_comms(void)  		printk(KERN_WARNING "XENBUS response ring is not quiescent "  		       "(%08x:%08x): fixing up\n",  		       intf->rsp_cons, intf->rsp_prod); -		intf->rsp_cons = intf->rsp_prod; +		/* breaks kdump */ +		if (!reset_devices) +			intf->rsp_cons = intf->rsp_prod;  	}  	if (xenbus_irq) { diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index bd2f90c9ac8..cef9b0bf63d 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -684,64 +684,74 @@ static int __init xenbus_probe_initcall(void)  device_initcall(xenbus_probe_initcall); -static int __init xenbus_init(void) +/* Set up event channel for xenstored which is run as a local process + * (this is normally used only in dom0) + */ +static int __init xenstored_local_init(void)  {  	int err = 0;  	unsigned long page = 0; +	struct evtchn_alloc_unbound alloc_unbound; -	DPRINTK(""); +	/* Allocate Xenstore page */ +	page = get_zeroed_page(GFP_KERNEL); +	if (!page) +		goto out_err; -	err = -ENODEV; -	if (!xen_domain()) -		return err; +	xen_store_mfn = xen_start_info->store_mfn = +		pfn_to_mfn(virt_to_phys((void *)page) >> +			   PAGE_SHIFT); -	/* -	 * Domain0 doesn't have a store_evtchn or store_mfn yet. -	 */ -	if (xen_initial_domain()) { -		struct evtchn_alloc_unbound alloc_unbound; +	/* Next allocate a local port which xenstored can bind to */ +	alloc_unbound.dom        = DOMID_SELF; +	alloc_unbound.remote_dom = DOMID_SELF; -		/* Allocate Xenstore page */ -		page = get_zeroed_page(GFP_KERNEL); -		if (!page) -			goto out_error; +	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, +					  &alloc_unbound); +	if (err == -ENOSYS) +		goto out_err; -		xen_store_mfn = xen_start_info->store_mfn = -			pfn_to_mfn(virt_to_phys((void *)page) >> -				   PAGE_SHIFT); +	BUG_ON(err); +	xen_store_evtchn = xen_start_info->store_evtchn = +		alloc_unbound.port; -		/* Next allocate a local port which xenstored can bind to */ -		alloc_unbound.dom        = DOMID_SELF; -		alloc_unbound.remote_dom = 0; +	return 0; -		err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, -						  &alloc_unbound); -		if (err == -ENOSYS) -			goto out_error; + out_err: +	if (page != 0) +		free_page(page); +	return err; +} -		BUG_ON(err); -		xen_store_evtchn = xen_start_info->store_evtchn = -			alloc_unbound.port; +static int __init xenbus_init(void) +{ +	int err = 0; -		xen_store_interface = mfn_to_virt(xen_store_mfn); +	if (!xen_domain()) +		return -ENODEV; + +	if (xen_hvm_domain()) { +		uint64_t v = 0; +		err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); +		if (err) +			goto out_error; +		xen_store_evtchn = (int)v; +		err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); +		if (err) +			goto out_error; +		xen_store_mfn = (unsigned long)v; +		xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);  	} else { -		if (xen_hvm_domain()) { -			uint64_t v = 0; -			err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); -			if (err) -				goto out_error; -			xen_store_evtchn = (int)v; -			err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); +		xen_store_evtchn = xen_start_info->store_evtchn; +		xen_store_mfn = xen_start_info->store_mfn; +		if (xen_store_evtchn) +			xenstored_ready = 1; +		else { +			err = xenstored_local_init();  			if (err)  				goto out_error; -			xen_store_mfn = (unsigned long)v; -			xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); -		} else { -			xen_store_evtchn = xen_start_info->store_evtchn; -			xen_store_mfn = xen_start_info->store_mfn; -			xen_store_interface = mfn_to_virt(xen_store_mfn); -			xenstored_ready = 1;  		} +		xen_store_interface = mfn_to_virt(xen_store_mfn);  	}  	/* Initialize the interface to xenstore. */ @@ -760,12 +770,7 @@ static int __init xenbus_init(void)  	proc_mkdir("xen", NULL);  #endif -	return 0; - -  out_error: -	if (page != 0) -		free_page(page); - + out_error:  	return err;  } diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 60adf919d78..32417b5064f 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -104,8 +104,6 @@ static int xenbus_uevent_backend(struct device *dev,  	xdev = to_xenbus_device(dev);  	bus = container_of(xdev->dev.bus, struct xen_bus_type, bus); -	if (xdev == NULL) -		return -ENODEV;  	if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype))  		return -ENOMEM; diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index ed2ba474a56..540587e18a9 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -248,10 +248,131 @@ int __xenbus_register_frontend(struct xenbus_driver *drv,  }  EXPORT_SYMBOL_GPL(__xenbus_register_frontend); +static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq); +static int backend_state; + +static void xenbus_reset_backend_state_changed(struct xenbus_watch *w, +					const char **v, unsigned int l) +{ +	xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state); +	printk(KERN_DEBUG "XENBUS: backend %s %s\n", +			v[XS_WATCH_PATH], xenbus_strstate(backend_state)); +	wake_up(&backend_state_wq); +} + +static void xenbus_reset_wait_for_backend(char *be, int expected) +{ +	long timeout; +	timeout = wait_event_interruptible_timeout(backend_state_wq, +			backend_state == expected, 5 * HZ); +	if (timeout <= 0) +		printk(KERN_INFO "XENBUS: backend %s timed out.\n", be); +} + +/* + * Reset frontend if it is in Connected or Closed state. + * Wait for backend to catch up. + * State Connected happens during kdump, Closed after kexec. + */ +static void xenbus_reset_frontend(char *fe, char *be, int be_state) +{ +	struct xenbus_watch be_watch; + +	printk(KERN_DEBUG "XENBUS: backend %s %s\n", +			be, xenbus_strstate(be_state)); + +	memset(&be_watch, 0, sizeof(be_watch)); +	be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be); +	if (!be_watch.node) +		return; + +	be_watch.callback = xenbus_reset_backend_state_changed; +	backend_state = XenbusStateUnknown; + +	printk(KERN_INFO "XENBUS: triggering reconnect on %s\n", be); +	register_xenbus_watch(&be_watch); + +	/* fall through to forward backend to state XenbusStateInitialising */ +	switch (be_state) { +	case XenbusStateConnected: +		xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing); +		xenbus_reset_wait_for_backend(be, XenbusStateClosing); + +	case XenbusStateClosing: +		xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed); +		xenbus_reset_wait_for_backend(be, XenbusStateClosed); + +	case XenbusStateClosed: +		xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising); +		xenbus_reset_wait_for_backend(be, XenbusStateInitWait); +	} + +	unregister_xenbus_watch(&be_watch); +	printk(KERN_INFO "XENBUS: reconnect done on %s\n", be); +	kfree(be_watch.node); +} + +static void xenbus_check_frontend(char *class, char *dev) +{ +	int be_state, fe_state, err; +	char *backend, *frontend; + +	frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev); +	if (!frontend) +		return; + +	err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state); +	if (err != 1) +		goto out; + +	switch (fe_state) { +	case XenbusStateConnected: +	case XenbusStateClosed: +		printk(KERN_DEBUG "XENBUS: frontend %s %s\n", +				frontend, xenbus_strstate(fe_state)); +		backend = xenbus_read(XBT_NIL, frontend, "backend", NULL); +		if (!backend || IS_ERR(backend)) +			goto out; +		err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state); +		if (err == 1) +			xenbus_reset_frontend(frontend, backend, be_state); +		kfree(backend); +		break; +	default: +		break; +	} +out: +	kfree(frontend); +} + +static void xenbus_reset_state(void) +{ +	char **devclass, **dev; +	int devclass_n, dev_n; +	int i, j; + +	devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n); +	if (IS_ERR(devclass)) +		return; + +	for (i = 0; i < devclass_n; i++) { +		dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n); +		if (IS_ERR(dev)) +			continue; +		for (j = 0; j < dev_n; j++) +			xenbus_check_frontend(devclass[i], dev[j]); +		kfree(dev); +	} +	kfree(devclass); +} +  static int frontend_probe_and_watch(struct notifier_block *notifier,  				   unsigned long event,  				   void *data)  { +	/* reset devices in Connected or Closed state */ +	if (xen_hvm_domain()) +		xenbus_reset_state();  	/* Enumerate devices in xenstore and watch for changes. */  	xenbus_probe_devices(&xenbus_frontend);  	register_xenbus_watch(&fe_watch); diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 5534690075a..b3b8f2f3ad1 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -45,6 +45,7 @@  #include <linux/module.h>  #include <linux/mutex.h>  #include <xen/xenbus.h> +#include <xen/xen.h>  #include "xenbus_comms.h"  struct xs_stored_msg { @@ -620,6 +621,15 @@ static struct xenbus_watch *find_watch(const char *token)  	return NULL;  } +static void xs_reset_watches(void) +{ +	int err; + +	err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL)); +	if (err && err != -EEXIST) +		printk(KERN_WARNING "xs_reset_watches failed: %d\n", err); +} +  /* Register callback to watch this node. */  int register_xenbus_watch(struct xenbus_watch *watch)  { @@ -638,8 +648,7 @@ int register_xenbus_watch(struct xenbus_watch *watch)  	err = xs_watch(watch->node, token); -	/* Ignore errors due to multiple registration. */ -	if ((err != 0) && (err != -EEXIST)) { +	if (err) {  		spin_lock(&watches_lock);  		list_del(&watch->list);  		spin_unlock(&watches_lock); @@ -897,5 +906,9 @@ int xs_init(void)  	if (IS_ERR(task))  		return PTR_ERR(task); +	/* shutdown watches for kexec boot */ +	if (xen_hvm_domain()) +		xs_reset_watches(); +  	return 0;  }  |