diff options
Diffstat (limited to 'arch/x86/kernel/amd_iommu.c')
| -rw-r--r-- | arch/x86/kernel/amd_iommu.c | 1245 | 
1 files changed, 669 insertions, 576 deletions
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 98f230f6a28..32fb09102a1 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1,5 +1,5 @@  /* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.   * Author: Joerg Roedel <joerg.roedel@amd.com>   *         Leo Duran <leo.duran@amd.com>   * @@ -28,6 +28,7 @@  #include <asm/proto.h>  #include <asm/iommu.h>  #include <asm/gart.h> +#include <asm/amd_iommu_proto.h>  #include <asm/amd_iommu_types.h>  #include <asm/amd_iommu.h> @@ -56,20 +57,115 @@ struct iommu_cmd {  	u32 data[4];  }; -static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, -			     struct unity_map_entry *e); -static struct dma_ops_domain *find_protection_domain(u16 devid); -static u64 *alloc_pte(struct protection_domain *domain, -		      unsigned long address, int end_lvl, -		      u64 **pte_page, gfp_t gfp); -static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, -				      unsigned long start_page, -				      unsigned int pages);  static void reset_iommu_command_buffer(struct amd_iommu *iommu); -static u64 *fetch_pte(struct protection_domain *domain, -		      unsigned long address, int map_size);  static void update_domain(struct protection_domain *domain); +/**************************************************************************** + * + * Helper functions + * + ****************************************************************************/ + +static inline u16 get_device_id(struct device *dev) +{ +	struct pci_dev *pdev = to_pci_dev(dev); + +	return calc_devid(pdev->bus->number, pdev->devfn); +} + +static struct iommu_dev_data *get_dev_data(struct device *dev) +{ +	return dev->archdata.iommu; +} + +/* + * In this function the list of preallocated protection domains is traversed to + * find the domain for a specific device + */ +static struct dma_ops_domain *find_protection_domain(u16 devid) +{ +	struct dma_ops_domain *entry, *ret = NULL; +	unsigned long flags; +	u16 alias = amd_iommu_alias_table[devid]; + +	if (list_empty(&iommu_pd_list)) +		return NULL; + +	spin_lock_irqsave(&iommu_pd_list_lock, flags); + +	list_for_each_entry(entry, &iommu_pd_list, list) { +		if (entry->target_dev == devid || +		    entry->target_dev == alias) { +			ret = entry; +			break; +		} +	} + +	spin_unlock_irqrestore(&iommu_pd_list_lock, flags); + +	return ret; +} + +/* + * This function checks if the driver got a valid device from the caller to + * avoid dereferencing invalid pointers. + */ +static bool check_device(struct device *dev) +{ +	u16 devid; + +	if (!dev || !dev->dma_mask) +		return false; + +	/* No device or no PCI device */ +	if (!dev || dev->bus != &pci_bus_type) +		return false; + +	devid = get_device_id(dev); + +	/* Out of our scope? */ +	if (devid > amd_iommu_last_bdf) +		return false; + +	if (amd_iommu_rlookup_table[devid] == NULL) +		return false; + +	return true; +} + +static int iommu_init_device(struct device *dev) +{ +	struct iommu_dev_data *dev_data; +	struct pci_dev *pdev; +	u16 devid, alias; + +	if (dev->archdata.iommu) +		return 0; + +	dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); +	if (!dev_data) +		return -ENOMEM; + +	dev_data->dev = dev; + +	devid = get_device_id(dev); +	alias = amd_iommu_alias_table[devid]; +	pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff); +	if (pdev) +		dev_data->alias = &pdev->dev; + +	atomic_set(&dev_data->bind, 0); + +	dev->archdata.iommu = dev_data; + + +	return 0; +} + +static void iommu_uninit_device(struct device *dev) +{ +	kfree(dev->archdata.iommu); +}  #ifdef CONFIG_AMD_IOMMU_STATS  /* @@ -90,7 +186,6 @@ DECLARE_STATS_COUNTER(alloced_io_mem);  DECLARE_STATS_COUNTER(total_map_requests);  static struct dentry *stats_dir; -static struct dentry *de_isolate;  static struct dentry *de_fflush;  static void amd_iommu_stats_add(struct __iommu_counter *cnt) @@ -108,9 +203,6 @@ static void amd_iommu_stats_init(void)  	if (stats_dir == NULL)  		return; -	de_isolate = debugfs_create_bool("isolation", 0444, stats_dir, -					 (u32 *)&amd_iommu_isolate); -  	de_fflush  = debugfs_create_bool("fullflush", 0444, stats_dir,  					 (u32 *)&amd_iommu_unmap_flush); @@ -130,12 +222,6 @@ static void amd_iommu_stats_init(void)  #endif -/* returns !0 if the IOMMU is caching non-present entries in its TLB */ -static int iommu_has_npcache(struct amd_iommu *iommu) -{ -	return iommu->cap & (1UL << IOMMU_CAP_NPCACHE); -} -  /****************************************************************************   *   * Interrupt handling functions @@ -199,6 +285,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)  		break;  	case EVENT_TYPE_ILL_CMD:  		printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); +		iommu->reset_in_progress = true;  		reset_iommu_command_buffer(iommu);  		dump_command(address);  		break; @@ -321,11 +408,8 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu)  	status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;  	writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); -	if (unlikely(i == EXIT_LOOP_COUNT)) { -		spin_unlock(&iommu->lock); -		reset_iommu_command_buffer(iommu); -		spin_lock(&iommu->lock); -	} +	if (unlikely(i == EXIT_LOOP_COUNT)) +		iommu->reset_in_progress = true;  }  /* @@ -372,26 +456,46 @@ static int iommu_completion_wait(struct amd_iommu *iommu)  out:  	spin_unlock_irqrestore(&iommu->lock, flags); +	if (iommu->reset_in_progress) +		reset_iommu_command_buffer(iommu); +  	return 0;  } +static void iommu_flush_complete(struct protection_domain *domain) +{ +	int i; + +	for (i = 0; i < amd_iommus_present; ++i) { +		if (!domain->dev_iommu[i]) +			continue; + +		/* +		 * Devices of this domain are behind this IOMMU +		 * We need to wait for completion of all commands. +		 */ +		iommu_completion_wait(amd_iommus[i]); +	} +} +  /*   * Command send function for invalidating a device table entry   */ -static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) +static int iommu_flush_device(struct device *dev)  { +	struct amd_iommu *iommu;  	struct iommu_cmd cmd; -	int ret; +	u16 devid; -	BUG_ON(iommu == NULL); +	devid = get_device_id(dev); +	iommu = amd_iommu_rlookup_table[devid]; +	/* Build command */  	memset(&cmd, 0, sizeof(cmd));  	CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);  	cmd.data[0] = devid; -	ret = iommu_queue_command(iommu, &cmd); - -	return ret; +	return iommu_queue_command(iommu, &cmd);  }  static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, @@ -430,11 +534,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,   * It invalidates a single PTE if the range to flush is within a single   * page. Otherwise it flushes the whole TLB of the IOMMU.   */ -static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, -		u64 address, size_t size) +static void __iommu_flush_pages(struct protection_domain *domain, +				u64 address, size_t size, int pde)  { -	int s = 0; -	unsigned pages = iommu_num_pages(address, size, PAGE_SIZE); +	int s = 0, i; +	unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE);  	address &= PAGE_MASK; @@ -447,142 +551,212 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,  		s = 1;  	} -	iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s); -	return 0; +	for (i = 0; i < amd_iommus_present; ++i) { +		if (!domain->dev_iommu[i]) +			continue; + +		/* +		 * Devices of this domain are behind this IOMMU +		 * We need a TLB flush +		 */ +		iommu_queue_inv_iommu_pages(amd_iommus[i], address, +					    domain->id, pde, s); +	} + +	return;  } -/* Flush the whole IO/TLB for a given protection domain */ -static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) +static void iommu_flush_pages(struct protection_domain *domain, +			     u64 address, size_t size)  { -	u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - -	INC_STATS_COUNTER(domain_flush_single); +	__iommu_flush_pages(domain, address, size, 0); +} -	iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); +/* Flush the whole IO/TLB for a given protection domain */ +static void iommu_flush_tlb(struct protection_domain *domain) +{ +	__iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);  }  /* Flush the whole IO/TLB for a given protection domain - including PDE */ -static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) +static void iommu_flush_tlb_pde(struct protection_domain *domain)  { -       u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - -       INC_STATS_COUNTER(domain_flush_single); - -       iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1); +	__iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);  } +  /* - * This function flushes one domain on one IOMMU + * This function flushes the DTEs for all devices in domain   */ -static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid) +static void iommu_flush_domain_devices(struct protection_domain *domain)  { -	struct iommu_cmd cmd; +	struct iommu_dev_data *dev_data;  	unsigned long flags; -	__iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, -				      domid, 1, 1); +	spin_lock_irqsave(&domain->lock, flags); -	spin_lock_irqsave(&iommu->lock, flags); -	__iommu_queue_command(iommu, &cmd); -	__iommu_completion_wait(iommu); -	__iommu_wait_for_completion(iommu); -	spin_unlock_irqrestore(&iommu->lock, flags); +	list_for_each_entry(dev_data, &domain->dev_list, list) +		iommu_flush_device(dev_data->dev); + +	spin_unlock_irqrestore(&domain->lock, flags);  } -static void flush_all_domains_on_iommu(struct amd_iommu *iommu) +static void iommu_flush_all_domain_devices(void)  { -	int i; +	struct protection_domain *domain; +	unsigned long flags; -	for (i = 1; i < MAX_DOMAIN_ID; ++i) { -		if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) -			continue; -		flush_domain_on_iommu(iommu, i); +	spin_lock_irqsave(&amd_iommu_pd_lock, flags); + +	list_for_each_entry(domain, &amd_iommu_pd_list, list) { +		iommu_flush_domain_devices(domain); +		iommu_flush_complete(domain);  	} +	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); +} + +void amd_iommu_flush_all_devices(void) +{ +	iommu_flush_all_domain_devices();  }  /* - * This function is used to flush the IO/TLB for a given protection domain - * on every IOMMU in the system + * This function uses heavy locking and may disable irqs for some time. But + * this is no issue because it is only called during resume.   */ -static void iommu_flush_domain(u16 domid) +void amd_iommu_flush_all_domains(void)  { -	struct amd_iommu *iommu; +	struct protection_domain *domain; +	unsigned long flags; -	INC_STATS_COUNTER(domain_flush_all); +	spin_lock_irqsave(&amd_iommu_pd_lock, flags); -	for_each_iommu(iommu) -		flush_domain_on_iommu(iommu, domid); +	list_for_each_entry(domain, &amd_iommu_pd_list, list) { +		spin_lock(&domain->lock); +		iommu_flush_tlb_pde(domain); +		iommu_flush_complete(domain); +		spin_unlock(&domain->lock); +	} + +	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);  } -void amd_iommu_flush_all_domains(void) +static void reset_iommu_command_buffer(struct amd_iommu *iommu)  { -	struct amd_iommu *iommu; +	pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); -	for_each_iommu(iommu) -		flush_all_domains_on_iommu(iommu); +	if (iommu->reset_in_progress) +		panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); + +	amd_iommu_reset_cmd_buffer(iommu); +	amd_iommu_flush_all_devices(); +	amd_iommu_flush_all_domains(); + +	iommu->reset_in_progress = false;  } -static void flush_all_devices_for_iommu(struct amd_iommu *iommu) +/**************************************************************************** + * + * The functions below are used the create the page table mappings for + * unity mapped regions. + * + ****************************************************************************/ + +/* + * This function is used to add another level to an IO page table. Adding + * another level increases the size of the address space by 9 bits to a size up + * to 64 bits. + */ +static bool increase_address_space(struct protection_domain *domain, +				   gfp_t gfp)  { -	int i; +	u64 *pte; -	for (i = 0; i <= amd_iommu_last_bdf; ++i) { -		if (iommu != amd_iommu_rlookup_table[i]) -			continue; +	if (domain->mode == PAGE_MODE_6_LEVEL) +		/* address space already 64 bit large */ +		return false; -		iommu_queue_inv_dev_entry(iommu, i); -		iommu_completion_wait(iommu); -	} +	pte = (void *)get_zeroed_page(gfp); +	if (!pte) +		return false; + +	*pte             = PM_LEVEL_PDE(domain->mode, +					virt_to_phys(domain->pt_root)); +	domain->pt_root  = pte; +	domain->mode    += 1; +	domain->updated  = true; + +	return true;  } -static void flush_devices_by_domain(struct protection_domain *domain) +static u64 *alloc_pte(struct protection_domain *domain, +		      unsigned long address, +		      int end_lvl, +		      u64 **pte_page, +		      gfp_t gfp)  { -	struct amd_iommu *iommu; -	int i; +	u64 *pte, *page; +	int level; -	for (i = 0; i <= amd_iommu_last_bdf; ++i) { -		if ((domain == NULL && amd_iommu_pd_table[i] == NULL) || -		    (amd_iommu_pd_table[i] != domain)) -			continue; +	while (address > PM_LEVEL_SIZE(domain->mode)) +		increase_address_space(domain, gfp); -		iommu = amd_iommu_rlookup_table[i]; -		if (!iommu) -			continue; +	level =  domain->mode - 1; +	pte   = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; + +	while (level > end_lvl) { +		if (!IOMMU_PTE_PRESENT(*pte)) { +			page = (u64 *)get_zeroed_page(gfp); +			if (!page) +				return NULL; +			*pte = PM_LEVEL_PDE(level, virt_to_phys(page)); +		} -		iommu_queue_inv_dev_entry(iommu, i); -		iommu_completion_wait(iommu); +		level -= 1; + +		pte = IOMMU_PTE_PAGE(*pte); + +		if (pte_page && level == end_lvl) +			*pte_page = pte; + +		pte = &pte[PM_LEVEL_INDEX(level, address)];  	} + +	return pte;  } -static void reset_iommu_command_buffer(struct amd_iommu *iommu) +/* + * This function checks if there is a PTE for a given dma address. If + * there is one, it returns the pointer to it. + */ +static u64 *fetch_pte(struct protection_domain *domain, +		      unsigned long address, int map_size)  { -	pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); +	int level; +	u64 *pte; -	if (iommu->reset_in_progress) -		panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); +	level =  domain->mode - 1; +	pte   = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; -	iommu->reset_in_progress = true; +	while (level > map_size) { +		if (!IOMMU_PTE_PRESENT(*pte)) +			return NULL; -	amd_iommu_reset_cmd_buffer(iommu); -	flush_all_devices_for_iommu(iommu); -	flush_all_domains_on_iommu(iommu); +		level -= 1; -	iommu->reset_in_progress = false; -} +		pte = IOMMU_PTE_PAGE(*pte); +		pte = &pte[PM_LEVEL_INDEX(level, address)]; -void amd_iommu_flush_all_devices(void) -{ -	flush_devices_by_domain(NULL); -} +		if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { +			pte = NULL; +			break; +		} +	} -/**************************************************************************** - * - * The functions below are used the create the page table mappings for - * unity mapped regions. - * - ****************************************************************************/ +	return pte; +}  /*   * Generic mapping functions. It maps a physical address into a DMA @@ -654,28 +828,6 @@ static int iommu_for_unity_map(struct amd_iommu *iommu,  }  /* - * Init the unity mappings for a specific IOMMU in the system - * - * Basically iterates over all unity mapping entries and applies them to - * the default domain DMA of that IOMMU if necessary. - */ -static int iommu_init_unity_mappings(struct amd_iommu *iommu) -{ -	struct unity_map_entry *entry; -	int ret; - -	list_for_each_entry(entry, &amd_iommu_unity_map, list) { -		if (!iommu_for_unity_map(iommu, entry)) -			continue; -		ret = dma_ops_unity_map(iommu->default_dom, entry); -		if (ret) -			return ret; -	} - -	return 0; -} - -/*   * This function actually applies the mapping to the page table of the   * dma_ops domain.   */ @@ -704,6 +856,28 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,  }  /* + * Init the unity mappings for a specific IOMMU in the system + * + * Basically iterates over all unity mapping entries and applies them to + * the default domain DMA of that IOMMU if necessary. + */ +static int iommu_init_unity_mappings(struct amd_iommu *iommu) +{ +	struct unity_map_entry *entry; +	int ret; + +	list_for_each_entry(entry, &amd_iommu_unity_map, list) { +		if (!iommu_for_unity_map(iommu, entry)) +			continue; +		ret = dma_ops_unity_map(iommu->default_dom, entry); +		if (ret) +			return ret; +	} + +	return 0; +} + +/*   * Inits the unity mappings required for a specific device   */  static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, @@ -740,34 +914,23 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,   */  /* - * This function checks if there is a PTE for a given dma address. If - * there is one, it returns the pointer to it. + * Used to reserve address ranges in the aperture (e.g. for exclusion + * ranges.   */ -static u64 *fetch_pte(struct protection_domain *domain, -		      unsigned long address, int map_size) +static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, +				      unsigned long start_page, +				      unsigned int pages)  { -	int level; -	u64 *pte; - -	level =  domain->mode - 1; -	pte   = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - -	while (level > map_size) { -		if (!IOMMU_PTE_PRESENT(*pte)) -			return NULL; - -		level -= 1; +	unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; -		pte = IOMMU_PTE_PAGE(*pte); -		pte = &pte[PM_LEVEL_INDEX(level, address)]; +	if (start_page + pages > last_page) +		pages = last_page - start_page; -		if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { -			pte = NULL; -			break; -		} +	for (i = start_page; i < start_page + pages; ++i) { +		int index = i / APERTURE_RANGE_PAGES; +		int page  = i % APERTURE_RANGE_PAGES; +		__set_bit(page, dom->aperture[index]->bitmap);  	} - -	return pte;  }  /* @@ -775,11 +938,11 @@ static u64 *fetch_pte(struct protection_domain *domain,   * aperture in case of dma_ops domain allocation or address allocation   * failure.   */ -static int alloc_new_range(struct amd_iommu *iommu, -			   struct dma_ops_domain *dma_dom, +static int alloc_new_range(struct dma_ops_domain *dma_dom,  			   bool populate, gfp_t gfp)  {  	int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; +	struct amd_iommu *iommu;  	int i;  #ifdef CONFIG_IOMMU_STRESS @@ -819,14 +982,17 @@ static int alloc_new_range(struct amd_iommu *iommu,  	dma_dom->aperture_size += APERTURE_RANGE_SIZE;  	/* Intialize the exclusion range if necessary */ -	if (iommu->exclusion_start && -	    iommu->exclusion_start >= dma_dom->aperture[index]->offset && -	    iommu->exclusion_start < dma_dom->aperture_size) { -		unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; -		int pages = iommu_num_pages(iommu->exclusion_start, -					    iommu->exclusion_length, -					    PAGE_SIZE); -		dma_ops_reserve_addresses(dma_dom, startpage, pages); +	for_each_iommu(iommu) { +		if (iommu->exclusion_start && +		    iommu->exclusion_start >= dma_dom->aperture[index]->offset +		    && iommu->exclusion_start < dma_dom->aperture_size) { +			unsigned long startpage; +			int pages = iommu_num_pages(iommu->exclusion_start, +						    iommu->exclusion_length, +						    PAGE_SIZE); +			startpage = iommu->exclusion_start >> PAGE_SHIFT; +			dma_ops_reserve_addresses(dma_dom, startpage, pages); +		}  	}  	/* @@ -928,7 +1094,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,  	}  	if (unlikely(address == -1)) -		address = bad_dma_address; +		address = DMA_ERROR_CODE;  	WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); @@ -973,6 +1139,31 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,   *   ****************************************************************************/ +/* + * This function adds a protection domain to the global protection domain list + */ +static void add_domain_to_list(struct protection_domain *domain) +{ +	unsigned long flags; + +	spin_lock_irqsave(&amd_iommu_pd_lock, flags); +	list_add(&domain->list, &amd_iommu_pd_list); +	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); +} + +/* + * This function removes a protection domain to the global + * protection domain list + */ +static void del_domain_from_list(struct protection_domain *domain) +{ +	unsigned long flags; + +	spin_lock_irqsave(&amd_iommu_pd_lock, flags); +	list_del(&domain->list); +	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); +} +  static u16 domain_id_alloc(void)  {  	unsigned long flags; @@ -1000,26 +1191,6 @@ static void domain_id_free(int id)  	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);  } -/* - * Used to reserve address ranges in the aperture (e.g. for exclusion - * ranges. - */ -static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, -				      unsigned long start_page, -				      unsigned int pages) -{ -	unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; - -	if (start_page + pages > last_page) -		pages = last_page - start_page; - -	for (i = start_page; i < start_page + pages; ++i) { -		int index = i / APERTURE_RANGE_PAGES; -		int page  = i % APERTURE_RANGE_PAGES; -		__set_bit(page, dom->aperture[index]->bitmap); -	} -} -  static void free_pagetable(struct protection_domain *domain)  {  	int i, j; @@ -1061,6 +1232,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)  	if (!dom)  		return; +	del_domain_from_list(&dom->domain); +  	free_pagetable(&dom->domain);  	for (i = 0; i < APERTURE_MAX_RANGES; ++i) { @@ -1078,7 +1251,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)   * It also intializes the page table and the address allocator data   * structures required for the dma_ops interface   */ -static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) +static struct dma_ops_domain *dma_ops_domain_alloc(void)  {  	struct dma_ops_domain *dma_dom; @@ -1091,6 +1264,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)  	dma_dom->domain.id = domain_id_alloc();  	if (dma_dom->domain.id == 0)  		goto free_dma_dom; +	INIT_LIST_HEAD(&dma_dom->domain.dev_list);  	dma_dom->domain.mode = PAGE_MODE_2_LEVEL;  	dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);  	dma_dom->domain.flags = PD_DMA_OPS_MASK; @@ -1101,7 +1275,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)  	dma_dom->need_flush = false;  	dma_dom->target_dev = 0xffff; -	if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL)) +	add_domain_to_list(&dma_dom->domain); + +	if (alloc_new_range(dma_dom, true, GFP_KERNEL))  		goto free_dma_dom;  	/* @@ -1129,22 +1305,6 @@ static bool dma_ops_domain(struct protection_domain *domain)  	return domain->flags & PD_DMA_OPS_MASK;  } -/* - * Find out the protection domain structure for a given PCI device. This - * will give us the pointer to the page table root for example. - */ -static struct protection_domain *domain_for_device(u16 devid) -{ -	struct protection_domain *dom; -	unsigned long flags; - -	read_lock_irqsave(&amd_iommu_devtable_lock, flags); -	dom = amd_iommu_pd_table[devid]; -	read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); - -	return dom; -} -  static void set_dte_entry(u16 devid, struct protection_domain *domain)  {  	u64 pte_root = virt_to_phys(domain->pt_root); @@ -1156,42 +1316,123 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain)  	amd_iommu_dev_table[devid].data[2] = domain->id;  	amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);  	amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); +} + +static void clear_dte_entry(u16 devid) +{ +	/* remove entry from the device table seen by the hardware */ +	amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; +	amd_iommu_dev_table[devid].data[1] = 0; +	amd_iommu_dev_table[devid].data[2] = 0; -	amd_iommu_pd_table[devid] = domain; +	amd_iommu_apply_erratum_63(devid); +} + +static void do_attach(struct device *dev, struct protection_domain *domain) +{ +	struct iommu_dev_data *dev_data; +	struct amd_iommu *iommu; +	u16 devid; + +	devid    = get_device_id(dev); +	iommu    = amd_iommu_rlookup_table[devid]; +	dev_data = get_dev_data(dev); + +	/* Update data structures */ +	dev_data->domain = domain; +	list_add(&dev_data->list, &domain->dev_list); +	set_dte_entry(devid, domain); + +	/* Do reference counting */ +	domain->dev_iommu[iommu->index] += 1; +	domain->dev_cnt                 += 1; + +	/* Flush the DTE entry */ +	iommu_flush_device(dev); +} + +static void do_detach(struct device *dev) +{ +	struct iommu_dev_data *dev_data; +	struct amd_iommu *iommu; +	u16 devid; + +	devid    = get_device_id(dev); +	iommu    = amd_iommu_rlookup_table[devid]; +	dev_data = get_dev_data(dev); + +	/* decrease reference counters */ +	dev_data->domain->dev_iommu[iommu->index] -= 1; +	dev_data->domain->dev_cnt                 -= 1; + +	/* Update data structures */ +	dev_data->domain = NULL; +	list_del(&dev_data->list); +	clear_dte_entry(devid); + +	/* Flush the DTE entry */ +	iommu_flush_device(dev);  }  /*   * If a device is not yet associated with a domain, this function does   * assigns it visible for the hardware   */ -static void __attach_device(struct amd_iommu *iommu, -			    struct protection_domain *domain, -			    u16 devid) +static int __attach_device(struct device *dev, +			   struct protection_domain *domain)  { +	struct iommu_dev_data *dev_data, *alias_data; + +	dev_data   = get_dev_data(dev); +	alias_data = get_dev_data(dev_data->alias); + +	if (!alias_data) +		return -EINVAL; +  	/* lock domain */  	spin_lock(&domain->lock); -	/* update DTE entry */ -	set_dte_entry(devid, domain); +	/* Some sanity checks */ +	if (alias_data->domain != NULL && +	    alias_data->domain != domain) +		return -EBUSY; + +	if (dev_data->domain != NULL && +	    dev_data->domain != domain) +		return -EBUSY; -	domain->dev_cnt += 1; +	/* Do real assignment */ +	if (dev_data->alias != dev) { +		alias_data = get_dev_data(dev_data->alias); +		if (alias_data->domain == NULL) +			do_attach(dev_data->alias, domain); + +		atomic_inc(&alias_data->bind); +	} + +	if (dev_data->domain == NULL) +		do_attach(dev, domain); + +	atomic_inc(&dev_data->bind);  	/* ready */  	spin_unlock(&domain->lock); + +	return 0;  }  /*   * If a device is not yet associated with a domain, this function does   * assigns it visible for the hardware   */ -static void attach_device(struct amd_iommu *iommu, -			  struct protection_domain *domain, -			  u16 devid) +static int attach_device(struct device *dev, +			 struct protection_domain *domain)  {  	unsigned long flags; +	int ret;  	write_lock_irqsave(&amd_iommu_devtable_lock, flags); -	__attach_device(iommu, domain, devid); +	ret = __attach_device(dev, domain);  	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);  	/* @@ -1199,96 +1440,125 @@ static void attach_device(struct amd_iommu *iommu,  	 * left the caches in the IOMMU dirty. So we have to flush  	 * here to evict all dirty stuff.  	 */ -	iommu_queue_inv_dev_entry(iommu, devid); -	iommu_flush_tlb_pde(iommu, domain->id); +	iommu_flush_tlb_pde(domain); + +	return ret;  }  /*   * Removes a device from a protection domain (unlocked)   */ -static void __detach_device(struct protection_domain *domain, u16 devid) +static void __detach_device(struct device *dev)  { +	struct iommu_dev_data *dev_data = get_dev_data(dev); +	struct iommu_dev_data *alias_data; +	unsigned long flags; -	/* lock domain */ -	spin_lock(&domain->lock); +	BUG_ON(!dev_data->domain); -	/* remove domain from the lookup table */ -	amd_iommu_pd_table[devid] = NULL; +	spin_lock_irqsave(&dev_data->domain->lock, flags); -	/* remove entry from the device table seen by the hardware */ -	amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; -	amd_iommu_dev_table[devid].data[1] = 0; -	amd_iommu_dev_table[devid].data[2] = 0; +	if (dev_data->alias != dev) { +		alias_data = get_dev_data(dev_data->alias); +		if (atomic_dec_and_test(&alias_data->bind)) +			do_detach(dev_data->alias); +	} -	/* decrease reference counter */ -	domain->dev_cnt -= 1; +	if (atomic_dec_and_test(&dev_data->bind)) +		do_detach(dev); -	/* ready */ -	spin_unlock(&domain->lock); +	spin_unlock_irqrestore(&dev_data->domain->lock, flags);  	/*  	 * If we run in passthrough mode the device must be assigned to the  	 * passthrough domain if it is detached from any other domain  	 */ -	if (iommu_pass_through) { -		struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; -		__attach_device(iommu, pt_domain, devid); -	} +	if (iommu_pass_through && dev_data->domain == NULL) +		__attach_device(dev, pt_domain);  }  /*   * Removes a device from a protection domain (with devtable_lock held)   */ -static void detach_device(struct protection_domain *domain, u16 devid) +static void detach_device(struct device *dev)  {  	unsigned long flags;  	/* lock device table */  	write_lock_irqsave(&amd_iommu_devtable_lock, flags); -	__detach_device(domain, devid); +	__detach_device(dev);  	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);  } +/* + * Find out the protection domain structure for a given PCI device. This + * will give us the pointer to the page table root for example. + */ +static struct protection_domain *domain_for_device(struct device *dev) +{ +	struct protection_domain *dom; +	struct iommu_dev_data *dev_data, *alias_data; +	unsigned long flags; +	u16 devid, alias; + +	devid      = get_device_id(dev); +	alias      = amd_iommu_alias_table[devid]; +	dev_data   = get_dev_data(dev); +	alias_data = get_dev_data(dev_data->alias); +	if (!alias_data) +		return NULL; + +	read_lock_irqsave(&amd_iommu_devtable_lock, flags); +	dom = dev_data->domain; +	if (dom == NULL && +	    alias_data->domain != NULL) { +		__attach_device(dev, alias_data->domain); +		dom = alias_data->domain; +	} + +	read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + +	return dom; +} +  static int device_change_notifier(struct notifier_block *nb,  				  unsigned long action, void *data)  {  	struct device *dev = data; -	struct pci_dev *pdev = to_pci_dev(dev); -	u16 devid = calc_devid(pdev->bus->number, pdev->devfn); +	u16 devid;  	struct protection_domain *domain;  	struct dma_ops_domain *dma_domain;  	struct amd_iommu *iommu;  	unsigned long flags; -	if (devid > amd_iommu_last_bdf) -		goto out; - -	devid = amd_iommu_alias_table[devid]; - -	iommu = amd_iommu_rlookup_table[devid]; -	if (iommu == NULL) -		goto out; - -	domain = domain_for_device(devid); +	if (!check_device(dev)) +		return 0; -	if (domain && !dma_ops_domain(domain)) -		WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " -			  "to a non-dma-ops domain\n", dev_name(dev)); +	devid  = get_device_id(dev); +	iommu  = amd_iommu_rlookup_table[devid];  	switch (action) {  	case BUS_NOTIFY_UNBOUND_DRIVER: + +		domain = domain_for_device(dev); +  		if (!domain)  			goto out;  		if (iommu_pass_through)  			break; -		detach_device(domain, devid); +		detach_device(dev);  		break;  	case BUS_NOTIFY_ADD_DEVICE: + +		iommu_init_device(dev); + +		domain = domain_for_device(dev); +  		/* allocate a protection domain if a device is added */  		dma_domain = find_protection_domain(devid);  		if (dma_domain)  			goto out; -		dma_domain = dma_ops_domain_alloc(iommu); +		dma_domain = dma_ops_domain_alloc();  		if (!dma_domain)  			goto out;  		dma_domain->target_dev = devid; @@ -1298,11 +1568,15 @@ static int device_change_notifier(struct notifier_block *nb,  		spin_unlock_irqrestore(&iommu_pd_list_lock, flags);  		break; +	case BUS_NOTIFY_DEL_DEVICE: + +		iommu_uninit_device(dev); +  	default:  		goto out;  	} -	iommu_queue_inv_dev_entry(iommu, devid); +	iommu_flush_device(dev);  	iommu_completion_wait(iommu);  out: @@ -1320,106 +1594,46 @@ static struct notifier_block device_nb = {   *****************************************************************************/  /* - * This function checks if the driver got a valid device from the caller to - * avoid dereferencing invalid pointers. - */ -static bool check_device(struct device *dev) -{ -	if (!dev || !dev->dma_mask) -		return false; - -	return true; -} - -/* - * In this function the list of preallocated protection domains is traversed to - * find the domain for a specific device - */ -static struct dma_ops_domain *find_protection_domain(u16 devid) -{ -	struct dma_ops_domain *entry, *ret = NULL; -	unsigned long flags; - -	if (list_empty(&iommu_pd_list)) -		return NULL; - -	spin_lock_irqsave(&iommu_pd_list_lock, flags); - -	list_for_each_entry(entry, &iommu_pd_list, list) { -		if (entry->target_dev == devid) { -			ret = entry; -			break; -		} -	} - -	spin_unlock_irqrestore(&iommu_pd_list_lock, flags); - -	return ret; -} - -/*   * In the dma_ops path we only have the struct device. This function   * finds the corresponding IOMMU, the protection domain and the   * requestor id for a given device.   * If the device is not yet associated with a domain this is also done   * in this function.   */ -static int get_device_resources(struct device *dev, -				struct amd_iommu **iommu, -				struct protection_domain **domain, -				u16 *bdf) +static struct protection_domain *get_domain(struct device *dev)  { +	struct protection_domain *domain;  	struct dma_ops_domain *dma_dom; -	struct pci_dev *pcidev; -	u16 _bdf; - -	*iommu = NULL; -	*domain = NULL; -	*bdf = 0xffff; - -	if (dev->bus != &pci_bus_type) -		return 0; +	u16 devid = get_device_id(dev); -	pcidev = to_pci_dev(dev); -	_bdf = calc_devid(pcidev->bus->number, pcidev->devfn); - -	/* device not translated by any IOMMU in the system? */ -	if (_bdf > amd_iommu_last_bdf) -		return 0; +	if (!check_device(dev)) +		return ERR_PTR(-EINVAL); -	*bdf = amd_iommu_alias_table[_bdf]; +	domain = domain_for_device(dev); +	if (domain != NULL && !dma_ops_domain(domain)) +		return ERR_PTR(-EBUSY); -	*iommu = amd_iommu_rlookup_table[*bdf]; -	if (*iommu == NULL) -		return 0; -	*domain = domain_for_device(*bdf); -	if (*domain == NULL) { -		dma_dom = find_protection_domain(*bdf); -		if (!dma_dom) -			dma_dom = (*iommu)->default_dom; -		*domain = &dma_dom->domain; -		attach_device(*iommu, *domain, *bdf); -		DUMP_printk("Using protection domain %d for device %s\n", -			    (*domain)->id, dev_name(dev)); -	} +	if (domain != NULL) +		return domain; -	if (domain_for_device(_bdf) == NULL) -		attach_device(*iommu, *domain, _bdf); +	/* Device not bount yet - bind it */ +	dma_dom = find_protection_domain(devid); +	if (!dma_dom) +		dma_dom = amd_iommu_rlookup_table[devid]->default_dom; +	attach_device(dev, &dma_dom->domain); +	DUMP_printk("Using protection domain %d for device %s\n", +		    dma_dom->domain.id, dev_name(dev)); -	return 1; +	return &dma_dom->domain;  }  static void update_device_table(struct protection_domain *domain)  { -	unsigned long flags; -	int i; +	struct iommu_dev_data *dev_data; -	for (i = 0; i <= amd_iommu_last_bdf; ++i) { -		if (amd_iommu_pd_table[i] != domain) -			continue; -		write_lock_irqsave(&amd_iommu_devtable_lock, flags); -		set_dte_entry(i, domain); -		write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); +	list_for_each_entry(dev_data, &domain->dev_list, list) { +		u16 devid = get_device_id(dev_data->dev); +		set_dte_entry(devid, domain);  	}  } @@ -1429,76 +1643,13 @@ static void update_domain(struct protection_domain *domain)  		return;  	update_device_table(domain); -	flush_devices_by_domain(domain); -	iommu_flush_domain(domain->id); +	iommu_flush_domain_devices(domain); +	iommu_flush_tlb_pde(domain);  	domain->updated = false;  }  /* - * This function is used to add another level to an IO page table. Adding - * another level increases the size of the address space by 9 bits to a size up - * to 64 bits. - */ -static bool increase_address_space(struct protection_domain *domain, -				   gfp_t gfp) -{ -	u64 *pte; - -	if (domain->mode == PAGE_MODE_6_LEVEL) -		/* address space already 64 bit large */ -		return false; - -	pte = (void *)get_zeroed_page(gfp); -	if (!pte) -		return false; - -	*pte             = PM_LEVEL_PDE(domain->mode, -					virt_to_phys(domain->pt_root)); -	domain->pt_root  = pte; -	domain->mode    += 1; -	domain->updated  = true; - -	return true; -} - -static u64 *alloc_pte(struct protection_domain *domain, -		      unsigned long address, -		      int end_lvl, -		      u64 **pte_page, -		      gfp_t gfp) -{ -	u64 *pte, *page; -	int level; - -	while (address > PM_LEVEL_SIZE(domain->mode)) -		increase_address_space(domain, gfp); - -	level =  domain->mode - 1; -	pte   = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - -	while (level > end_lvl) { -		if (!IOMMU_PTE_PRESENT(*pte)) { -			page = (u64 *)get_zeroed_page(gfp); -			if (!page) -				return NULL; -			*pte = PM_LEVEL_PDE(level, virt_to_phys(page)); -		} - -		level -= 1; - -		pte = IOMMU_PTE_PAGE(*pte); - -		if (pte_page && level == end_lvl) -			*pte_page = pte; - -		pte = &pte[PM_LEVEL_INDEX(level, address)]; -	} - -	return pte; -} - -/*   * This function fetches the PTE for a given address in the aperture   */  static u64* dma_ops_get_pte(struct dma_ops_domain *dom, @@ -1528,8 +1679,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom,   * This is the generic map function. It maps one 4kb page at paddr to   * the given address in the DMA address space for the domain.   */ -static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, -				     struct dma_ops_domain *dom, +static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,  				     unsigned long address,  				     phys_addr_t paddr,  				     int direction) @@ -1542,7 +1692,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,  	pte  = dma_ops_get_pte(dom, address);  	if (!pte) -		return bad_dma_address; +		return DMA_ERROR_CODE;  	__pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; @@ -1563,8 +1713,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,  /*   * The generic unmapping function for on page in the DMA address space.   */ -static void dma_ops_domain_unmap(struct amd_iommu *iommu, -				 struct dma_ops_domain *dom, +static void dma_ops_domain_unmap(struct dma_ops_domain *dom,  				 unsigned long address)  {  	struct aperture_range *aperture; @@ -1595,7 +1744,6 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,   * Must be called with the domain lock held.   */  static dma_addr_t __map_single(struct device *dev, -			       struct amd_iommu *iommu,  			       struct dma_ops_domain *dma_dom,  			       phys_addr_t paddr,  			       size_t size, @@ -1623,7 +1771,7 @@ static dma_addr_t __map_single(struct device *dev,  retry:  	address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,  					  dma_mask); -	if (unlikely(address == bad_dma_address)) { +	if (unlikely(address == DMA_ERROR_CODE)) {  		/*  		 * setting next_address here will let the address  		 * allocator only scan the new allocated range in the @@ -1631,7 +1779,7 @@ retry:  		 */  		dma_dom->next_address = dma_dom->aperture_size; -		if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC)) +		if (alloc_new_range(dma_dom, false, GFP_ATOMIC))  			goto out;  		/* @@ -1643,8 +1791,8 @@ retry:  	start = address;  	for (i = 0; i < pages; ++i) { -		ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); -		if (ret == bad_dma_address) +		ret = dma_ops_domain_map(dma_dom, start, paddr, dir); +		if (ret == DMA_ERROR_CODE)  			goto out_unmap;  		paddr += PAGE_SIZE; @@ -1655,10 +1803,10 @@ retry:  	ADD_STATS_COUNTER(alloced_io_mem, size);  	if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { -		iommu_flush_tlb(iommu, dma_dom->domain.id); +		iommu_flush_tlb(&dma_dom->domain);  		dma_dom->need_flush = false; -	} else if (unlikely(iommu_has_npcache(iommu))) -		iommu_flush_pages(iommu, dma_dom->domain.id, address, size); +	} else if (unlikely(amd_iommu_np_cache)) +		iommu_flush_pages(&dma_dom->domain, address, size);  out:  	return address; @@ -1667,20 +1815,19 @@ out_unmap:  	for (--i; i >= 0; --i) {  		start -= PAGE_SIZE; -		dma_ops_domain_unmap(iommu, dma_dom, start); +		dma_ops_domain_unmap(dma_dom, start);  	}  	dma_ops_free_addresses(dma_dom, address, pages); -	return bad_dma_address; +	return DMA_ERROR_CODE;  }  /*   * Does the reverse of the __map_single function. Must be called with   * the domain lock held too   */ -static void __unmap_single(struct amd_iommu *iommu, -			   struct dma_ops_domain *dma_dom, +static void __unmap_single(struct dma_ops_domain *dma_dom,  			   dma_addr_t dma_addr,  			   size_t size,  			   int dir) @@ -1688,7 +1835,7 @@ static void __unmap_single(struct amd_iommu *iommu,  	dma_addr_t i, start;  	unsigned int pages; -	if ((dma_addr == bad_dma_address) || +	if ((dma_addr == DMA_ERROR_CODE) ||  	    (dma_addr + size > dma_dom->aperture_size))  		return; @@ -1697,7 +1844,7 @@ static void __unmap_single(struct amd_iommu *iommu,  	start = dma_addr;  	for (i = 0; i < pages; ++i) { -		dma_ops_domain_unmap(iommu, dma_dom, start); +		dma_ops_domain_unmap(dma_dom, start);  		start += PAGE_SIZE;  	} @@ -1706,7 +1853,7 @@ static void __unmap_single(struct amd_iommu *iommu,  	dma_ops_free_addresses(dma_dom, dma_addr, pages);  	if (amd_iommu_unmap_flush || dma_dom->need_flush) { -		iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); +		iommu_flush_pages(&dma_dom->domain, dma_addr, size);  		dma_dom->need_flush = false;  	}  } @@ -1720,36 +1867,29 @@ static dma_addr_t map_page(struct device *dev, struct page *page,  			   struct dma_attrs *attrs)  {  	unsigned long flags; -	struct amd_iommu *iommu;  	struct protection_domain *domain; -	u16 devid;  	dma_addr_t addr;  	u64 dma_mask;  	phys_addr_t paddr = page_to_phys(page) + offset;  	INC_STATS_COUNTER(cnt_map_single); -	if (!check_device(dev)) -		return bad_dma_address; - -	dma_mask = *dev->dma_mask; - -	get_device_resources(dev, &iommu, &domain, &devid); - -	if (iommu == NULL || domain == NULL) -		/* device not handled by any AMD IOMMU */ +	domain = get_domain(dev); +	if (PTR_ERR(domain) == -EINVAL)  		return (dma_addr_t)paddr; +	else if (IS_ERR(domain)) +		return DMA_ERROR_CODE; -	if (!dma_ops_domain(domain)) -		return bad_dma_address; +	dma_mask = *dev->dma_mask;  	spin_lock_irqsave(&domain->lock, flags); -	addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, + +	addr = __map_single(dev, domain->priv, paddr, size, dir, false,  			    dma_mask); -	if (addr == bad_dma_address) +	if (addr == DMA_ERROR_CODE)  		goto out; -	iommu_completion_wait(iommu); +	iommu_flush_complete(domain);  out:  	spin_unlock_irqrestore(&domain->lock, flags); @@ -1764,25 +1904,19 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,  		       enum dma_data_direction dir, struct dma_attrs *attrs)  {  	unsigned long flags; -	struct amd_iommu *iommu;  	struct protection_domain *domain; -	u16 devid;  	INC_STATS_COUNTER(cnt_unmap_single); -	if (!check_device(dev) || -	    !get_device_resources(dev, &iommu, &domain, &devid)) -		/* device not handled by any AMD IOMMU */ -		return; - -	if (!dma_ops_domain(domain)) +	domain = get_domain(dev); +	if (IS_ERR(domain))  		return;  	spin_lock_irqsave(&domain->lock, flags); -	__unmap_single(iommu, domain->priv, dma_addr, size, dir); +	__unmap_single(domain->priv, dma_addr, size, dir); -	iommu_completion_wait(iommu); +	iommu_flush_complete(domain);  	spin_unlock_irqrestore(&domain->lock, flags);  } @@ -1814,9 +1948,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,  		  struct dma_attrs *attrs)  {  	unsigned long flags; -	struct amd_iommu *iommu;  	struct protection_domain *domain; -	u16 devid;  	int i;  	struct scatterlist *s;  	phys_addr_t paddr; @@ -1825,25 +1957,20 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,  	INC_STATS_COUNTER(cnt_map_sg); -	if (!check_device(dev)) +	domain = get_domain(dev); +	if (PTR_ERR(domain) == -EINVAL) +		return map_sg_no_iommu(dev, sglist, nelems, dir); +	else if (IS_ERR(domain))  		return 0;  	dma_mask = *dev->dma_mask; -	get_device_resources(dev, &iommu, &domain, &devid); - -	if (!iommu || !domain) -		return map_sg_no_iommu(dev, sglist, nelems, dir); - -	if (!dma_ops_domain(domain)) -		return 0; -  	spin_lock_irqsave(&domain->lock, flags);  	for_each_sg(sglist, s, nelems, i) {  		paddr = sg_phys(s); -		s->dma_address = __map_single(dev, iommu, domain->priv, +		s->dma_address = __map_single(dev, domain->priv,  					      paddr, s->length, dir, false,  					      dma_mask); @@ -1854,7 +1981,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,  			goto unmap;  	} -	iommu_completion_wait(iommu); +	iommu_flush_complete(domain);  out:  	spin_unlock_irqrestore(&domain->lock, flags); @@ -1863,7 +1990,7 @@ out:  unmap:  	for_each_sg(sglist, s, mapped_elems, i) {  		if (s->dma_address) -			__unmap_single(iommu, domain->priv, s->dma_address, +			__unmap_single(domain->priv, s->dma_address,  				       s->dma_length, dir);  		s->dma_address = s->dma_length = 0;  	} @@ -1882,30 +2009,25 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,  		     struct dma_attrs *attrs)  {  	unsigned long flags; -	struct amd_iommu *iommu;  	struct protection_domain *domain;  	struct scatterlist *s; -	u16 devid;  	int i;  	INC_STATS_COUNTER(cnt_unmap_sg); -	if (!check_device(dev) || -	    !get_device_resources(dev, &iommu, &domain, &devid)) -		return; - -	if (!dma_ops_domain(domain)) +	domain = get_domain(dev); +	if (IS_ERR(domain))  		return;  	spin_lock_irqsave(&domain->lock, flags);  	for_each_sg(sglist, s, nelems, i) { -		__unmap_single(iommu, domain->priv, s->dma_address, +		__unmap_single(domain->priv, s->dma_address,  			       s->dma_length, dir);  		s->dma_address = s->dma_length = 0;  	} -	iommu_completion_wait(iommu); +	iommu_flush_complete(domain);  	spin_unlock_irqrestore(&domain->lock, flags);  } @@ -1918,49 +2040,44 @@ static void *alloc_coherent(struct device *dev, size_t size,  {  	unsigned long flags;  	void *virt_addr; -	struct amd_iommu *iommu;  	struct protection_domain *domain; -	u16 devid;  	phys_addr_t paddr;  	u64 dma_mask = dev->coherent_dma_mask;  	INC_STATS_COUNTER(cnt_alloc_coherent); -	if (!check_device(dev)) +	domain = get_domain(dev); +	if (PTR_ERR(domain) == -EINVAL) { +		virt_addr = (void *)__get_free_pages(flag, get_order(size)); +		*dma_addr = __pa(virt_addr); +		return virt_addr; +	} else if (IS_ERR(domain))  		return NULL; -	if (!get_device_resources(dev, &iommu, &domain, &devid)) -		flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); +	dma_mask  = dev->coherent_dma_mask; +	flag     &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); +	flag     |= __GFP_ZERO; -	flag |= __GFP_ZERO;  	virt_addr = (void *)__get_free_pages(flag, get_order(size));  	if (!virt_addr)  		return NULL;  	paddr = virt_to_phys(virt_addr); -	if (!iommu || !domain) { -		*dma_addr = (dma_addr_t)paddr; -		return virt_addr; -	} - -	if (!dma_ops_domain(domain)) -		goto out_free; -  	if (!dma_mask)  		dma_mask = *dev->dma_mask;  	spin_lock_irqsave(&domain->lock, flags); -	*dma_addr = __map_single(dev, iommu, domain->priv, paddr, +	*dma_addr = __map_single(dev, domain->priv, paddr,  				 size, DMA_BIDIRECTIONAL, true, dma_mask); -	if (*dma_addr == bad_dma_address) { +	if (*dma_addr == DMA_ERROR_CODE) {  		spin_unlock_irqrestore(&domain->lock, flags);  		goto out_free;  	} -	iommu_completion_wait(iommu); +	iommu_flush_complete(domain);  	spin_unlock_irqrestore(&domain->lock, flags); @@ -1980,28 +2097,19 @@ static void free_coherent(struct device *dev, size_t size,  			  void *virt_addr, dma_addr_t dma_addr)  {  	unsigned long flags; -	struct amd_iommu *iommu;  	struct protection_domain *domain; -	u16 devid;  	INC_STATS_COUNTER(cnt_free_coherent); -	if (!check_device(dev)) -		return; - -	get_device_resources(dev, &iommu, &domain, &devid); - -	if (!iommu || !domain) -		goto free_mem; - -	if (!dma_ops_domain(domain)) +	domain = get_domain(dev); +	if (IS_ERR(domain))  		goto free_mem;  	spin_lock_irqsave(&domain->lock, flags); -	__unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); +	__unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); -	iommu_completion_wait(iommu); +	iommu_flush_complete(domain);  	spin_unlock_irqrestore(&domain->lock, flags); @@ -2015,22 +2123,7 @@ free_mem:   */  static int amd_iommu_dma_supported(struct device *dev, u64 mask)  { -	u16 bdf; -	struct pci_dev *pcidev; - -	/* No device or no PCI device */ -	if (!dev || dev->bus != &pci_bus_type) -		return 0; - -	pcidev = to_pci_dev(dev); - -	bdf = calc_devid(pcidev->bus->number, pcidev->devfn); - -	/* Out of our scope? */ -	if (bdf > amd_iommu_last_bdf) -		return 0; - -	return 1; +	return check_device(dev);  }  /* @@ -2044,25 +2137,30 @@ static void prealloc_protection_domains(void)  {  	struct pci_dev *dev = NULL;  	struct dma_ops_domain *dma_dom; -	struct amd_iommu *iommu;  	u16 devid;  	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { -		devid = calc_devid(dev->bus->number, dev->devfn); -		if (devid > amd_iommu_last_bdf) -			continue; -		devid = amd_iommu_alias_table[devid]; -		if (domain_for_device(devid)) + +		/* Do we handle this device? */ +		if (!check_device(&dev->dev))  			continue; -		iommu = amd_iommu_rlookup_table[devid]; -		if (!iommu) + +		iommu_init_device(&dev->dev); + +		/* Is there already any domain for it? */ +		if (domain_for_device(&dev->dev))  			continue; -		dma_dom = dma_ops_domain_alloc(iommu); + +		devid = get_device_id(&dev->dev); + +		dma_dom = dma_ops_domain_alloc();  		if (!dma_dom)  			continue;  		init_unity_mappings_for_device(dma_dom, devid);  		dma_dom->target_dev = devid; +		attach_device(&dev->dev, &dma_dom->domain); +  		list_add_tail(&dma_dom->list, &iommu_pd_list);  	}  } @@ -2091,7 +2189,7 @@ int __init amd_iommu_init_dma_ops(void)  	 * protection domain will be assigned to the default one.  	 */  	for_each_iommu(iommu) { -		iommu->default_dom = dma_ops_domain_alloc(iommu); +		iommu->default_dom = dma_ops_domain_alloc();  		if (iommu->default_dom == NULL)  			return -ENOMEM;  		iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; @@ -2101,15 +2199,12 @@ int __init amd_iommu_init_dma_ops(void)  	}  	/* -	 * If device isolation is enabled, pre-allocate the protection -	 * domains for each device. +	 * Pre-allocate the protection domains for each device.  	 */ -	if (amd_iommu_isolate) -		prealloc_protection_domains(); +	prealloc_protection_domains();  	iommu_detected = 1; -	force_iommu = 1; -	bad_dma_address = 0; +	swiotlb = 0;  #ifdef CONFIG_GART_IOMMU  	gart_iommu_aperture_disabled = 1;  	gart_iommu_aperture = 0; @@ -2148,14 +2243,17 @@ free_domains:  static void cleanup_domain(struct protection_domain *domain)  { +	struct iommu_dev_data *dev_data, *next;  	unsigned long flags; -	u16 devid;  	write_lock_irqsave(&amd_iommu_devtable_lock, flags); -	for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) -		if (amd_iommu_pd_table[devid] == domain) -			__detach_device(domain, devid); +	list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { +		struct device *dev = dev_data->dev; + +		do_detach(dev); +		atomic_set(&dev_data->bind, 0); +	}  	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);  } @@ -2165,6 +2263,8 @@ static void protection_domain_free(struct protection_domain *domain)  	if (!domain)  		return; +	del_domain_from_list(domain); +  	if (domain->id)  		domain_id_free(domain->id); @@ -2183,6 +2283,9 @@ static struct protection_domain *protection_domain_alloc(void)  	domain->id = domain_id_alloc();  	if (!domain->id)  		goto out_err; +	INIT_LIST_HEAD(&domain->dev_list); + +	add_domain_to_list(domain);  	return domain; @@ -2239,26 +2342,23 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom)  static void amd_iommu_detach_device(struct iommu_domain *dom,  				    struct device *dev)  { -	struct protection_domain *domain = dom->priv; +	struct iommu_dev_data *dev_data = dev->archdata.iommu;  	struct amd_iommu *iommu; -	struct pci_dev *pdev;  	u16 devid; -	if (dev->bus != &pci_bus_type) +	if (!check_device(dev))  		return; -	pdev = to_pci_dev(dev); - -	devid = calc_devid(pdev->bus->number, pdev->devfn); +	devid = get_device_id(dev); -	if (devid > 0) -		detach_device(domain, devid); +	if (dev_data->domain != NULL) +		detach_device(dev);  	iommu = amd_iommu_rlookup_table[devid];  	if (!iommu)  		return; -	iommu_queue_inv_dev_entry(iommu, devid); +	iommu_flush_device(dev);  	iommu_completion_wait(iommu);  } @@ -2266,35 +2366,30 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,  				   struct device *dev)  {  	struct protection_domain *domain = dom->priv; -	struct protection_domain *old_domain; +	struct iommu_dev_data *dev_data;  	struct amd_iommu *iommu; -	struct pci_dev *pdev; +	int ret;  	u16 devid; -	if (dev->bus != &pci_bus_type) +	if (!check_device(dev))  		return -EINVAL; -	pdev = to_pci_dev(dev); +	dev_data = dev->archdata.iommu; -	devid = calc_devid(pdev->bus->number, pdev->devfn); - -	if (devid >= amd_iommu_last_bdf || -			devid != amd_iommu_alias_table[devid]) -		return -EINVAL; +	devid = get_device_id(dev);  	iommu = amd_iommu_rlookup_table[devid];  	if (!iommu)  		return -EINVAL; -	old_domain = domain_for_device(devid); -	if (old_domain) -		detach_device(old_domain, devid); +	if (dev_data->domain) +		detach_device(dev); -	attach_device(iommu, domain, devid); +	ret = attach_device(dev, domain);  	iommu_completion_wait(iommu); -	return 0; +	return ret;  }  static int amd_iommu_map_range(struct iommu_domain *dom, @@ -2340,7 +2435,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom,  		iova  += PAGE_SIZE;  	} -	iommu_flush_domain(domain->id); +	iommu_flush_tlb_pde(domain);  }  static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, @@ -2391,8 +2486,9 @@ static struct iommu_ops amd_iommu_ops = {  int __init amd_iommu_init_passthrough(void)  { +	struct amd_iommu *iommu;  	struct pci_dev *dev = NULL; -	u16 devid, devid2; +	u16 devid;  	/* allocate passthroug domain */  	pt_domain = protection_domain_alloc(); @@ -2402,20 +2498,17 @@ int __init amd_iommu_init_passthrough(void)  	pt_domain->mode |= PAGE_MODE_NONE;  	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { -		struct amd_iommu *iommu; -		devid = calc_devid(dev->bus->number, dev->devfn); -		if (devid > amd_iommu_last_bdf) +		if (!check_device(&dev->dev))  			continue; -		devid2 = amd_iommu_alias_table[devid]; +		devid = get_device_id(&dev->dev); -		iommu = amd_iommu_rlookup_table[devid2]; +		iommu = amd_iommu_rlookup_table[devid];  		if (!iommu)  			continue; -		__attach_device(iommu, pt_domain, devid); -		__attach_device(iommu, pt_domain, devid2); +		attach_device(&dev->dev, pt_domain);  	}  	pr_info("AMD-Vi: Initialized for Passthrough Mode\n");  |