diff options
Diffstat (limited to 'virt/kvm/kvm_main.c')
| -rw-r--r-- | virt/kvm/kvm_main.c | 968 | 
1 files changed, 150 insertions, 818 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b7c78a403dc..f92ba138007 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -43,6 +43,7 @@  #include <linux/swap.h>  #include <linux/bitops.h>  #include <linux/spinlock.h> +#include <linux/compat.h>  #include <asm/processor.h>  #include <asm/io.h> @@ -53,12 +54,6 @@  #include "coalesced_mmio.h"  #endif -#ifdef KVM_CAP_DEVICE_ASSIGNMENT -#include <linux/pci.h> -#include <linux/interrupt.h> -#include "irq.h" -#endif -  #define CREATE_TRACE_POINTS  #include <trace/events/kvm.h> @@ -75,6 +70,8 @@ DEFINE_SPINLOCK(kvm_lock);  LIST_HEAD(vm_list);  static cpumask_var_t cpus_hardware_enabled; +static int kvm_usage_count = 0; +static atomic_t hardware_enable_failed;  struct kmem_cache *kvm_vcpu_cache;  EXPORT_SYMBOL_GPL(kvm_vcpu_cache); @@ -85,615 +82,13 @@ struct dentry *kvm_debugfs_dir;  static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,  			   unsigned long arg); +static int hardware_enable_all(void); +static void hardware_disable_all(void);  static bool kvm_rebooting;  static bool largepages_enabled = true; -#ifdef KVM_CAP_DEVICE_ASSIGNMENT -static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, -						      int assigned_dev_id) -{ -	struct list_head *ptr; -	struct kvm_assigned_dev_kernel *match; - -	list_for_each(ptr, head) { -		match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); -		if (match->assigned_dev_id == assigned_dev_id) -			return match; -	} -	return NULL; -} - -static int find_index_from_host_irq(struct kvm_assigned_dev_kernel -				    *assigned_dev, int irq) -{ -	int i, index; -	struct msix_entry *host_msix_entries; - -	host_msix_entries = assigned_dev->host_msix_entries; - -	index = -1; -	for (i = 0; i < assigned_dev->entries_nr; i++) -		if (irq == host_msix_entries[i].vector) { -			index = i; -			break; -		} -	if (index < 0) { -		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); -		return 0; -	} - -	return index; -} - -static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) -{ -	struct kvm_assigned_dev_kernel *assigned_dev; -	struct kvm *kvm; -	int i; - -	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, -				    interrupt_work); -	kvm = assigned_dev->kvm; - -	mutex_lock(&kvm->irq_lock); -	spin_lock_irq(&assigned_dev->assigned_dev_lock); -	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { -		struct kvm_guest_msix_entry *guest_entries = -			assigned_dev->guest_msix_entries; -		for (i = 0; i < assigned_dev->entries_nr; i++) { -			if (!(guest_entries[i].flags & -					KVM_ASSIGNED_MSIX_PENDING)) -				continue; -			guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; -			kvm_set_irq(assigned_dev->kvm, -				    assigned_dev->irq_source_id, -				    guest_entries[i].vector, 1); -		} -	} else -		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, -			    assigned_dev->guest_irq, 1); - -	spin_unlock_irq(&assigned_dev->assigned_dev_lock); -	mutex_unlock(&assigned_dev->kvm->irq_lock); -} - -static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) -{ -	unsigned long flags; -	struct kvm_assigned_dev_kernel *assigned_dev = -		(struct kvm_assigned_dev_kernel *) dev_id; - -	spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); -	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { -		int index = find_index_from_host_irq(assigned_dev, irq); -		if (index < 0) -			goto out; -		assigned_dev->guest_msix_entries[index].flags |= -			KVM_ASSIGNED_MSIX_PENDING; -	} - -	schedule_work(&assigned_dev->interrupt_work); - -	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { -		disable_irq_nosync(irq); -		assigned_dev->host_irq_disabled = true; -	} - -out: -	spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); -	return IRQ_HANDLED; -} - -/* Ack the irq line for an assigned device */ -static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) -{ -	struct kvm_assigned_dev_kernel *dev; -	unsigned long flags; - -	if (kian->gsi == -1) -		return; - -	dev = container_of(kian, struct kvm_assigned_dev_kernel, -			   ack_notifier); - -	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); - -	/* The guest irq may be shared so this ack may be -	 * from another device. -	 */ -	spin_lock_irqsave(&dev->assigned_dev_lock, flags); -	if (dev->host_irq_disabled) { -		enable_irq(dev->host_irq); -		dev->host_irq_disabled = false; -	} -	spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); -} - -static void deassign_guest_irq(struct kvm *kvm, -			       struct kvm_assigned_dev_kernel *assigned_dev) -{ -	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); -	assigned_dev->ack_notifier.gsi = -1; - -	if (assigned_dev->irq_source_id != -1) -		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); -	assigned_dev->irq_source_id = -1; -	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); -} - -/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ -static void deassign_host_irq(struct kvm *kvm, -			      struct kvm_assigned_dev_kernel *assigned_dev) -{ -	/* -	 * In kvm_free_device_irq, cancel_work_sync return true if: -	 * 1. work is scheduled, and then cancelled. -	 * 2. work callback is executed. -	 * -	 * The first one ensured that the irq is disabled and no more events -	 * would happen. But for the second one, the irq may be enabled (e.g. -	 * for MSI). So we disable irq here to prevent further events. -	 * -	 * Notice this maybe result in nested disable if the interrupt type is -	 * INTx, but it's OK for we are going to free it. -	 * -	 * If this function is a part of VM destroy, please ensure that till -	 * now, the kvm state is still legal for probably we also have to wait -	 * interrupt_work done. -	 */ -	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { -		int i; -		for (i = 0; i < assigned_dev->entries_nr; i++) -			disable_irq_nosync(assigned_dev-> -					   host_msix_entries[i].vector); - -		cancel_work_sync(&assigned_dev->interrupt_work); - -		for (i = 0; i < assigned_dev->entries_nr; i++) -			free_irq(assigned_dev->host_msix_entries[i].vector, -				 (void *)assigned_dev); - -		assigned_dev->entries_nr = 0; -		kfree(assigned_dev->host_msix_entries); -		kfree(assigned_dev->guest_msix_entries); -		pci_disable_msix(assigned_dev->dev); -	} else { -		/* Deal with MSI and INTx */ -		disable_irq_nosync(assigned_dev->host_irq); -		cancel_work_sync(&assigned_dev->interrupt_work); - -		free_irq(assigned_dev->host_irq, (void *)assigned_dev); - -		if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) -			pci_disable_msi(assigned_dev->dev); -	} - -	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); -} - -static int kvm_deassign_irq(struct kvm *kvm, -			    struct kvm_assigned_dev_kernel *assigned_dev, -			    unsigned long irq_requested_type) -{ -	unsigned long guest_irq_type, host_irq_type; - -	if (!irqchip_in_kernel(kvm)) -		return -EINVAL; -	/* no irq assignment to deassign */ -	if (!assigned_dev->irq_requested_type) -		return -ENXIO; - -	host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; -	guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; - -	if (host_irq_type) -		deassign_host_irq(kvm, assigned_dev); -	if (guest_irq_type) -		deassign_guest_irq(kvm, assigned_dev); - -	return 0; -} - -static void kvm_free_assigned_irq(struct kvm *kvm, -				  struct kvm_assigned_dev_kernel *assigned_dev) -{ -	kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); -} - -static void kvm_free_assigned_device(struct kvm *kvm, -				     struct kvm_assigned_dev_kernel -				     *assigned_dev) -{ -	kvm_free_assigned_irq(kvm, assigned_dev); - -	pci_reset_function(assigned_dev->dev); - -	pci_release_regions(assigned_dev->dev); -	pci_disable_device(assigned_dev->dev); -	pci_dev_put(assigned_dev->dev); - -	list_del(&assigned_dev->list); -	kfree(assigned_dev); -} - -void kvm_free_all_assigned_devices(struct kvm *kvm) -{ -	struct list_head *ptr, *ptr2; -	struct kvm_assigned_dev_kernel *assigned_dev; - -	list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { -		assigned_dev = list_entry(ptr, -					  struct kvm_assigned_dev_kernel, -					  list); - -		kvm_free_assigned_device(kvm, assigned_dev); -	} -} - -static int assigned_device_enable_host_intx(struct kvm *kvm, -					    struct kvm_assigned_dev_kernel *dev) -{ -	dev->host_irq = dev->dev->irq; -	/* Even though this is PCI, we don't want to use shared -	 * interrupts. Sharing host devices with guest-assigned devices -	 * on the same interrupt line is not a happy situation: there -	 * are going to be long delays in accepting, acking, etc. -	 */ -	if (request_irq(dev->host_irq, kvm_assigned_dev_intr, -			0, "kvm_assigned_intx_device", (void *)dev)) -		return -EIO; -	return 0; -} - -#ifdef __KVM_HAVE_MSI -static int assigned_device_enable_host_msi(struct kvm *kvm, -					   struct kvm_assigned_dev_kernel *dev) -{ -	int r; - -	if (!dev->dev->msi_enabled) { -		r = pci_enable_msi(dev->dev); -		if (r) -			return r; -	} - -	dev->host_irq = dev->dev->irq; -	if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, -			"kvm_assigned_msi_device", (void *)dev)) { -		pci_disable_msi(dev->dev); -		return -EIO; -	} - -	return 0; -} -#endif - -#ifdef __KVM_HAVE_MSIX -static int assigned_device_enable_host_msix(struct kvm *kvm, -					    struct kvm_assigned_dev_kernel *dev) -{ -	int i, r = -EINVAL; - -	/* host_msix_entries and guest_msix_entries should have been -	 * initialized */ -	if (dev->entries_nr == 0) -		return r; - -	r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); -	if (r) -		return r; - -	for (i = 0; i < dev->entries_nr; i++) { -		r = request_irq(dev->host_msix_entries[i].vector, -				kvm_assigned_dev_intr, 0, -				"kvm_assigned_msix_device", -				(void *)dev); -		/* FIXME: free requested_irq's on failure */ -		if (r) -			return r; -	} - -	return 0; -} - -#endif - -static int assigned_device_enable_guest_intx(struct kvm *kvm, -				struct kvm_assigned_dev_kernel *dev, -				struct kvm_assigned_irq *irq) -{ -	dev->guest_irq = irq->guest_irq; -	dev->ack_notifier.gsi = irq->guest_irq; -	return 0; -} - -#ifdef __KVM_HAVE_MSI -static int assigned_device_enable_guest_msi(struct kvm *kvm, -			struct kvm_assigned_dev_kernel *dev, -			struct kvm_assigned_irq *irq) -{ -	dev->guest_irq = irq->guest_irq; -	dev->ack_notifier.gsi = -1; -	dev->host_irq_disabled = false; -	return 0; -} -#endif -#ifdef __KVM_HAVE_MSIX -static int assigned_device_enable_guest_msix(struct kvm *kvm, -			struct kvm_assigned_dev_kernel *dev, -			struct kvm_assigned_irq *irq) -{ -	dev->guest_irq = irq->guest_irq; -	dev->ack_notifier.gsi = -1; -	dev->host_irq_disabled = false; -	return 0; -} -#endif - -static int assign_host_irq(struct kvm *kvm, -			   struct kvm_assigned_dev_kernel *dev, -			   __u32 host_irq_type) -{ -	int r = -EEXIST; - -	if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) -		return r; - -	switch (host_irq_type) { -	case KVM_DEV_IRQ_HOST_INTX: -		r = assigned_device_enable_host_intx(kvm, dev); -		break; -#ifdef __KVM_HAVE_MSI -	case KVM_DEV_IRQ_HOST_MSI: -		r = assigned_device_enable_host_msi(kvm, dev); -		break; -#endif -#ifdef __KVM_HAVE_MSIX -	case KVM_DEV_IRQ_HOST_MSIX: -		r = assigned_device_enable_host_msix(kvm, dev); -		break; -#endif -	default: -		r = -EINVAL; -	} - -	if (!r) -		dev->irq_requested_type |= host_irq_type; - -	return r; -} - -static int assign_guest_irq(struct kvm *kvm, -			    struct kvm_assigned_dev_kernel *dev, -			    struct kvm_assigned_irq *irq, -			    unsigned long guest_irq_type) -{ -	int id; -	int r = -EEXIST; - -	if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) -		return r; - -	id = kvm_request_irq_source_id(kvm); -	if (id < 0) -		return id; - -	dev->irq_source_id = id; - -	switch (guest_irq_type) { -	case KVM_DEV_IRQ_GUEST_INTX: -		r = assigned_device_enable_guest_intx(kvm, dev, irq); -		break; -#ifdef __KVM_HAVE_MSI -	case KVM_DEV_IRQ_GUEST_MSI: -		r = assigned_device_enable_guest_msi(kvm, dev, irq); -		break; -#endif -#ifdef __KVM_HAVE_MSIX -	case KVM_DEV_IRQ_GUEST_MSIX: -		r = assigned_device_enable_guest_msix(kvm, dev, irq); -		break; -#endif -	default: -		r = -EINVAL; -	} - -	if (!r) { -		dev->irq_requested_type |= guest_irq_type; -		kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); -	} else -		kvm_free_irq_source_id(kvm, dev->irq_source_id); - -	return r; -} - -/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ -static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, -				   struct kvm_assigned_irq *assigned_irq) -{ -	int r = -EINVAL; -	struct kvm_assigned_dev_kernel *match; -	unsigned long host_irq_type, guest_irq_type; - -	if (!capable(CAP_SYS_RAWIO)) -		return -EPERM; - -	if (!irqchip_in_kernel(kvm)) -		return r; - -	mutex_lock(&kvm->lock); -	r = -ENODEV; -	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, -				      assigned_irq->assigned_dev_id); -	if (!match) -		goto out; - -	host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); -	guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); - -	r = -EINVAL; -	/* can only assign one type at a time */ -	if (hweight_long(host_irq_type) > 1) -		goto out; -	if (hweight_long(guest_irq_type) > 1) -		goto out; -	if (host_irq_type == 0 && guest_irq_type == 0) -		goto out; - -	r = 0; -	if (host_irq_type) -		r = assign_host_irq(kvm, match, host_irq_type); -	if (r) -		goto out; - -	if (guest_irq_type) -		r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); -out: -	mutex_unlock(&kvm->lock); -	return r; -} - -static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, -					 struct kvm_assigned_irq -					 *assigned_irq) -{ -	int r = -ENODEV; -	struct kvm_assigned_dev_kernel *match; - -	mutex_lock(&kvm->lock); - -	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, -				      assigned_irq->assigned_dev_id); -	if (!match) -		goto out; - -	r = kvm_deassign_irq(kvm, match, assigned_irq->flags); -out: -	mutex_unlock(&kvm->lock); -	return r; -} - -static int kvm_vm_ioctl_assign_device(struct kvm *kvm, -				      struct kvm_assigned_pci_dev *assigned_dev) -{ -	int r = 0; -	struct kvm_assigned_dev_kernel *match; -	struct pci_dev *dev; - -	down_read(&kvm->slots_lock); -	mutex_lock(&kvm->lock); - -	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, -				      assigned_dev->assigned_dev_id); -	if (match) { -		/* device already assigned */ -		r = -EEXIST; -		goto out; -	} - -	match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); -	if (match == NULL) { -		printk(KERN_INFO "%s: Couldn't allocate memory\n", -		       __func__); -		r = -ENOMEM; -		goto out; -	} -	dev = pci_get_bus_and_slot(assigned_dev->busnr, -				   assigned_dev->devfn); -	if (!dev) { -		printk(KERN_INFO "%s: host device not found\n", __func__); -		r = -EINVAL; -		goto out_free; -	} -	if (pci_enable_device(dev)) { -		printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); -		r = -EBUSY; -		goto out_put; -	} -	r = pci_request_regions(dev, "kvm_assigned_device"); -	if (r) { -		printk(KERN_INFO "%s: Could not get access to device regions\n", -		       __func__); -		goto out_disable; -	} - -	pci_reset_function(dev); - -	match->assigned_dev_id = assigned_dev->assigned_dev_id; -	match->host_busnr = assigned_dev->busnr; -	match->host_devfn = assigned_dev->devfn; -	match->flags = assigned_dev->flags; -	match->dev = dev; -	spin_lock_init(&match->assigned_dev_lock); -	match->irq_source_id = -1; -	match->kvm = kvm; -	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; -	INIT_WORK(&match->interrupt_work, -		  kvm_assigned_dev_interrupt_work_handler); - -	list_add(&match->list, &kvm->arch.assigned_dev_head); - -	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { -		if (!kvm->arch.iommu_domain) { -			r = kvm_iommu_map_guest(kvm); -			if (r) -				goto out_list_del; -		} -		r = kvm_assign_device(kvm, match); -		if (r) -			goto out_list_del; -	} - -out: -	mutex_unlock(&kvm->lock); -	up_read(&kvm->slots_lock); -	return r; -out_list_del: -	list_del(&match->list); -	pci_release_regions(dev); -out_disable: -	pci_disable_device(dev); -out_put: -	pci_dev_put(dev); -out_free: -	kfree(match); -	mutex_unlock(&kvm->lock); -	up_read(&kvm->slots_lock); -	return r; -} -#endif - -#ifdef KVM_CAP_DEVICE_DEASSIGNMENT -static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, -		struct kvm_assigned_pci_dev *assigned_dev) -{ -	int r = 0; -	struct kvm_assigned_dev_kernel *match; - -	mutex_lock(&kvm->lock); - -	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, -				      assigned_dev->assigned_dev_id); -	if (!match) { -		printk(KERN_INFO "%s: device hasn't been assigned before, " -		  "so cannot be deassigned\n", __func__); -		r = -EINVAL; -		goto out; -	} - -	if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) -		kvm_deassign_device(kvm, match); - -	kvm_free_assigned_device(kvm, match); - -out: -	mutex_unlock(&kvm->lock); -	return r; -} -#endif -  inline int kvm_is_mmio_pfn(pfn_t pfn)  {  	if (pfn_valid(pfn)) { @@ -949,6 +344,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {  static struct kvm *kvm_create_vm(void)  { +	int r = 0;  	struct kvm *kvm = kvm_arch_create_vm();  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET  	struct page *page; @@ -956,16 +352,21 @@ static struct kvm *kvm_create_vm(void)  	if (IS_ERR(kvm))  		goto out; + +	r = hardware_enable_all(); +	if (r) +		goto out_err_nodisable; +  #ifdef CONFIG_HAVE_KVM_IRQCHIP -	INIT_LIST_HEAD(&kvm->irq_routing);  	INIT_HLIST_HEAD(&kvm->mask_notifier_list); +	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);  #endif  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET  	page = alloc_page(GFP_KERNEL | __GFP_ZERO);  	if (!page) { -		kfree(kvm); -		return ERR_PTR(-ENOMEM); +		r = -ENOMEM; +		goto out_err;  	}  	kvm->coalesced_mmio_ring =  			(struct kvm_coalesced_mmio_ring *)page_address(page); @@ -973,15 +374,13 @@ static struct kvm *kvm_create_vm(void)  #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)  	{ -		int err;  		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; -		err = mmu_notifier_register(&kvm->mmu_notifier, current->mm); -		if (err) { +		r = mmu_notifier_register(&kvm->mmu_notifier, current->mm); +		if (r) {  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET  			put_page(page);  #endif -			kfree(kvm); -			return ERR_PTR(err); +			goto out_err;  		}  	}  #endif @@ -1005,6 +404,12 @@ static struct kvm *kvm_create_vm(void)  #endif  out:  	return kvm; + +out_err: +	hardware_disable_all(); +out_err_nodisable: +	kfree(kvm); +	return ERR_PTR(r);  }  /* @@ -1063,6 +468,7 @@ static void kvm_destroy_vm(struct kvm *kvm)  	kvm_arch_flush_shadow(kvm);  #endif  	kvm_arch_destroy_vm(kvm); +	hardware_disable_all();  	mmdrop(mm);  } @@ -1689,9 +1095,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)  		if (signal_pending(current))  			break; -		vcpu_put(vcpu);  		schedule(); -		vcpu_load(vcpu);  	}  	finish_wait(&vcpu->wq, &wait); @@ -1705,6 +1109,21 @@ void kvm_resched(struct kvm_vcpu *vcpu)  }  EXPORT_SYMBOL_GPL(kvm_resched); +void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu) +{ +	ktime_t expires; +	DEFINE_WAIT(wait); + +	prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); + +	/* Sleep for 100 us, and hope lock-holder got scheduled */ +	expires = ktime_add_ns(ktime_get(), 100000UL); +	schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); + +	finish_wait(&vcpu->wq, &wait); +} +EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); +  static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)  {  	struct kvm_vcpu *vcpu = vma->vm_file->private_data; @@ -1828,88 +1247,6 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)  	return 0;  } -#ifdef __KVM_HAVE_MSIX -static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, -				    struct kvm_assigned_msix_nr *entry_nr) -{ -	int r = 0; -	struct kvm_assigned_dev_kernel *adev; - -	mutex_lock(&kvm->lock); - -	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, -				      entry_nr->assigned_dev_id); -	if (!adev) { -		r = -EINVAL; -		goto msix_nr_out; -	} - -	if (adev->entries_nr == 0) { -		adev->entries_nr = entry_nr->entry_nr; -		if (adev->entries_nr == 0 || -		    adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { -			r = -EINVAL; -			goto msix_nr_out; -		} - -		adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * -						entry_nr->entry_nr, -						GFP_KERNEL); -		if (!adev->host_msix_entries) { -			r = -ENOMEM; -			goto msix_nr_out; -		} -		adev->guest_msix_entries = kzalloc( -				sizeof(struct kvm_guest_msix_entry) * -				entry_nr->entry_nr, GFP_KERNEL); -		if (!adev->guest_msix_entries) { -			kfree(adev->host_msix_entries); -			r = -ENOMEM; -			goto msix_nr_out; -		} -	} else /* Not allowed set MSI-X number twice */ -		r = -EINVAL; -msix_nr_out: -	mutex_unlock(&kvm->lock); -	return r; -} - -static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, -				       struct kvm_assigned_msix_entry *entry) -{ -	int r = 0, i; -	struct kvm_assigned_dev_kernel *adev; - -	mutex_lock(&kvm->lock); - -	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, -				      entry->assigned_dev_id); - -	if (!adev) { -		r = -EINVAL; -		goto msix_entry_out; -	} - -	for (i = 0; i < adev->entries_nr; i++) -		if (adev->guest_msix_entries[i].vector == 0 || -		    adev->guest_msix_entries[i].entry == entry->entry) { -			adev->guest_msix_entries[i].entry = entry->entry; -			adev->guest_msix_entries[i].vector = entry->gsi; -			adev->host_msix_entries[i].entry = entry->entry; -			break; -		} -	if (i == adev->entries_nr) { -		r = -ENOSPC; -		goto msix_entry_out; -	} - -msix_entry_out: -	mutex_unlock(&kvm->lock); - -	return r; -} -#endif -  static long kvm_vcpu_ioctl(struct file *filp,  			   unsigned int ioctl, unsigned long arg)  { @@ -2168,112 +1505,6 @@ static long kvm_vm_ioctl(struct file *filp,  		break;  	}  #endif -#ifdef KVM_CAP_DEVICE_ASSIGNMENT -	case KVM_ASSIGN_PCI_DEVICE: { -		struct kvm_assigned_pci_dev assigned_dev; - -		r = -EFAULT; -		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) -			goto out; -		r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); -		if (r) -			goto out; -		break; -	} -	case KVM_ASSIGN_IRQ: { -		r = -EOPNOTSUPP; -		break; -	} -#ifdef KVM_CAP_ASSIGN_DEV_IRQ -	case KVM_ASSIGN_DEV_IRQ: { -		struct kvm_assigned_irq assigned_irq; - -		r = -EFAULT; -		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) -			goto out; -		r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); -		if (r) -			goto out; -		break; -	} -	case KVM_DEASSIGN_DEV_IRQ: { -		struct kvm_assigned_irq assigned_irq; - -		r = -EFAULT; -		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) -			goto out; -		r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); -		if (r) -			goto out; -		break; -	} -#endif -#endif -#ifdef KVM_CAP_DEVICE_DEASSIGNMENT -	case KVM_DEASSIGN_PCI_DEVICE: { -		struct kvm_assigned_pci_dev assigned_dev; - -		r = -EFAULT; -		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) -			goto out; -		r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); -		if (r) -			goto out; -		break; -	} -#endif -#ifdef KVM_CAP_IRQ_ROUTING -	case KVM_SET_GSI_ROUTING: { -		struct kvm_irq_routing routing; -		struct kvm_irq_routing __user *urouting; -		struct kvm_irq_routing_entry *entries; - -		r = -EFAULT; -		if (copy_from_user(&routing, argp, sizeof(routing))) -			goto out; -		r = -EINVAL; -		if (routing.nr >= KVM_MAX_IRQ_ROUTES) -			goto out; -		if (routing.flags) -			goto out; -		r = -ENOMEM; -		entries = vmalloc(routing.nr * sizeof(*entries)); -		if (!entries) -			goto out; -		r = -EFAULT; -		urouting = argp; -		if (copy_from_user(entries, urouting->entries, -				   routing.nr * sizeof(*entries))) -			goto out_free_irq_routing; -		r = kvm_set_irq_routing(kvm, entries, routing.nr, -					routing.flags); -	out_free_irq_routing: -		vfree(entries); -		break; -	} -#endif /* KVM_CAP_IRQ_ROUTING */ -#ifdef __KVM_HAVE_MSIX -	case KVM_ASSIGN_SET_MSIX_NR: { -		struct kvm_assigned_msix_nr entry_nr; -		r = -EFAULT; -		if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) -			goto out; -		r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); -		if (r) -			goto out; -		break; -	} -	case KVM_ASSIGN_SET_MSIX_ENTRY: { -		struct kvm_assigned_msix_entry entry; -		r = -EFAULT; -		if (copy_from_user(&entry, argp, sizeof entry)) -			goto out; -		r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); -		if (r) -			goto out; -		break; -	} -#endif  	case KVM_IRQFD: {  		struct kvm_irqfd data; @@ -2305,11 +1536,59 @@ static long kvm_vm_ioctl(struct file *filp,  #endif  	default:  		r = kvm_arch_vm_ioctl(filp, ioctl, arg); +		if (r == -ENOTTY) +			r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);  	}  out:  	return r;  } +#ifdef CONFIG_COMPAT +struct compat_kvm_dirty_log { +	__u32 slot; +	__u32 padding1; +	union { +		compat_uptr_t dirty_bitmap; /* one bit per page */ +		__u64 padding2; +	}; +}; + +static long kvm_vm_compat_ioctl(struct file *filp, +			   unsigned int ioctl, unsigned long arg) +{ +	struct kvm *kvm = filp->private_data; +	int r; + +	if (kvm->mm != current->mm) +		return -EIO; +	switch (ioctl) { +	case KVM_GET_DIRTY_LOG: { +		struct compat_kvm_dirty_log compat_log; +		struct kvm_dirty_log log; + +		r = -EFAULT; +		if (copy_from_user(&compat_log, (void __user *)arg, +				   sizeof(compat_log))) +			goto out; +		log.slot	 = compat_log.slot; +		log.padding1	 = compat_log.padding1; +		log.padding2	 = compat_log.padding2; +		log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); + +		r = kvm_vm_ioctl_get_dirty_log(kvm, &log); +		if (r) +			goto out; +		break; +	} +	default: +		r = kvm_vm_ioctl(filp, ioctl, arg); +	} + +out: +	return r; +} +#endif +  static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)  {  	struct page *page[1]; @@ -2344,7 +1623,9 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)  static struct file_operations kvm_vm_fops = {  	.release        = kvm_vm_release,  	.unlocked_ioctl = kvm_vm_ioctl, -	.compat_ioctl   = kvm_vm_ioctl, +#ifdef CONFIG_COMPAT +	.compat_ioctl   = kvm_vm_compat_ioctl, +#endif  	.mmap           = kvm_vm_mmap,  }; @@ -2372,6 +1653,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)  #ifdef CONFIG_KVM_APIC_ARCHITECTURE  	case KVM_CAP_SET_BOOT_CPU_ID:  #endif +	case KVM_CAP_INTERNAL_ERROR_DATA:  		return 1;  #ifdef CONFIG_HAVE_KVM_IRQCHIP  	case KVM_CAP_IRQ_ROUTING: @@ -2442,11 +1724,21 @@ static struct miscdevice kvm_dev = {  static void hardware_enable(void *junk)  {  	int cpu = raw_smp_processor_id(); +	int r;  	if (cpumask_test_cpu(cpu, cpus_hardware_enabled))  		return; +  	cpumask_set_cpu(cpu, cpus_hardware_enabled); -	kvm_arch_hardware_enable(NULL); + +	r = kvm_arch_hardware_enable(NULL); + +	if (r) { +		cpumask_clear_cpu(cpu, cpus_hardware_enabled); +		atomic_inc(&hardware_enable_failed); +		printk(KERN_INFO "kvm: enabling virtualization on " +				 "CPU%d failed\n", cpu); +	}  }  static void hardware_disable(void *junk) @@ -2459,11 +1751,52 @@ static void hardware_disable(void *junk)  	kvm_arch_hardware_disable(NULL);  } +static void hardware_disable_all_nolock(void) +{ +	BUG_ON(!kvm_usage_count); + +	kvm_usage_count--; +	if (!kvm_usage_count) +		on_each_cpu(hardware_disable, NULL, 1); +} + +static void hardware_disable_all(void) +{ +	spin_lock(&kvm_lock); +	hardware_disable_all_nolock(); +	spin_unlock(&kvm_lock); +} + +static int hardware_enable_all(void) +{ +	int r = 0; + +	spin_lock(&kvm_lock); + +	kvm_usage_count++; +	if (kvm_usage_count == 1) { +		atomic_set(&hardware_enable_failed, 0); +		on_each_cpu(hardware_enable, NULL, 1); + +		if (atomic_read(&hardware_enable_failed)) { +			hardware_disable_all_nolock(); +			r = -EBUSY; +		} +	} + +	spin_unlock(&kvm_lock); + +	return r; +} +  static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,  			   void *v)  {  	int cpu = (long)v; +	if (!kvm_usage_count) +		return NOTIFY_OK; +  	val &= ~CPU_TASKS_FROZEN;  	switch (val) {  	case CPU_DYING: @@ -2666,13 +1999,15 @@ static void kvm_exit_debug(void)  static int kvm_suspend(struct sys_device *dev, pm_message_t state)  { -	hardware_disable(NULL); +	if (kvm_usage_count) +		hardware_disable(NULL);  	return 0;  }  static int kvm_resume(struct sys_device *dev)  { -	hardware_enable(NULL); +	if (kvm_usage_count) +		hardware_enable(NULL);  	return 0;  } @@ -2717,8 +2052,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,  	int r;  	int cpu; -	kvm_init_debug(); -  	r = kvm_arch_init(opaque);  	if (r)  		goto out_fail; @@ -2749,7 +2082,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,  			goto out_free_1;  	} -	on_each_cpu(hardware_enable, NULL, 1);  	r = register_cpu_notifier(&kvm_cpu_notifier);  	if (r)  		goto out_free_2; @@ -2785,6 +2117,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size,  	kvm_preempt_ops.sched_in = kvm_sched_in;  	kvm_preempt_ops.sched_out = kvm_sched_out; +	kvm_init_debug(); +  	return 0;  out_free: @@ -2797,7 +2131,6 @@ out_free_3:  	unregister_reboot_notifier(&kvm_reboot_notifier);  	unregister_cpu_notifier(&kvm_cpu_notifier);  out_free_2: -	on_each_cpu(hardware_disable, NULL, 1);  out_free_1:  	kvm_arch_hardware_unsetup();  out_free_0a: @@ -2807,7 +2140,6 @@ out_free_0:  out:  	kvm_arch_exit();  out_fail: -	kvm_exit_debug();  	return r;  }  EXPORT_SYMBOL_GPL(kvm_init); @@ -2815,6 +2147,7 @@ EXPORT_SYMBOL_GPL(kvm_init);  void kvm_exit(void)  {  	tracepoint_synchronize_unregister(); +	kvm_exit_debug();  	misc_deregister(&kvm_dev);  	kmem_cache_destroy(kvm_vcpu_cache);  	sysdev_unregister(&kvm_sysdev); @@ -2824,7 +2157,6 @@ void kvm_exit(void)  	on_each_cpu(hardware_disable, NULL, 1);  	kvm_arch_hardware_unsetup();  	kvm_arch_exit(); -	kvm_exit_debug();  	free_cpumask_var(cpus_hardware_enabled);  	__free_page(bad_page);  }  |