diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 528 | 
1 files changed, 286 insertions, 242 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e55badb2d86..1f441f5c240 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -58,6 +58,7 @@ static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj);  static int i915_gem_inactive_shrink(struct shrinker *shrinker,  				    struct shrink_control *sc); +static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);  /* some bookkeeping */  static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, @@ -258,73 +259,6 @@ static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)  		obj->tiling_mode != I915_TILING_NONE;  } -static inline void -slow_shmem_copy(struct page *dst_page, -		int dst_offset, -		struct page *src_page, -		int src_offset, -		int length) -{ -	char *dst_vaddr, *src_vaddr; - -	dst_vaddr = kmap(dst_page); -	src_vaddr = kmap(src_page); - -	memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length); - -	kunmap(src_page); -	kunmap(dst_page); -} - -static inline void -slow_shmem_bit17_copy(struct page *gpu_page, -		      int gpu_offset, -		      struct page *cpu_page, -		      int cpu_offset, -		      int length, -		      int is_read) -{ -	char *gpu_vaddr, *cpu_vaddr; - -	/* Use the unswizzled path if this page isn't affected. */ -	if ((page_to_phys(gpu_page) & (1 << 17)) == 0) { -		if (is_read) -			return slow_shmem_copy(cpu_page, cpu_offset, -					       gpu_page, gpu_offset, length); -		else -			return slow_shmem_copy(gpu_page, gpu_offset, -					       cpu_page, cpu_offset, length); -	} - -	gpu_vaddr = kmap(gpu_page); -	cpu_vaddr = kmap(cpu_page); - -	/* Copy the data, XORing A6 with A17 (1). The user already knows he's -	 * XORing with the other bits (A9 for Y, A9 and A10 for X) -	 */ -	while (length > 0) { -		int cacheline_end = ALIGN(gpu_offset + 1, 64); -		int this_length = min(cacheline_end - gpu_offset, length); -		int swizzled_gpu_offset = gpu_offset ^ 64; - -		if (is_read) { -			memcpy(cpu_vaddr + cpu_offset, -			       gpu_vaddr + swizzled_gpu_offset, -			       this_length); -		} else { -			memcpy(gpu_vaddr + swizzled_gpu_offset, -			       cpu_vaddr + cpu_offset, -			       this_length); -		} -		cpu_offset += this_length; -		gpu_offset += this_length; -		length -= this_length; -	} - -	kunmap(cpu_page); -	kunmap(gpu_page); -} -  /**   * This is the fast shmem pread path, which attempts to copy_from_user directly   * from the backing pages of the object to the user's address space.  On a @@ -385,6 +319,58 @@ i915_gem_shmem_pread_fast(struct drm_device *dev,  	return 0;  } +static inline int +__copy_to_user_swizzled(char __user *cpu_vaddr, +			const char *gpu_vaddr, int gpu_offset, +			int length) +{ +	int ret, cpu_offset = 0; + +	while (length > 0) { +		int cacheline_end = ALIGN(gpu_offset + 1, 64); +		int this_length = min(cacheline_end - gpu_offset, length); +		int swizzled_gpu_offset = gpu_offset ^ 64; + +		ret = __copy_to_user(cpu_vaddr + cpu_offset, +				     gpu_vaddr + swizzled_gpu_offset, +				     this_length); +		if (ret) +			return ret + length; + +		cpu_offset += this_length; +		gpu_offset += this_length; +		length -= this_length; +	} + +	return 0; +} + +static inline int +__copy_from_user_swizzled(char __user *gpu_vaddr, int gpu_offset, +			  const char *cpu_vaddr, +			  int length) +{ +	int ret, cpu_offset = 0; + +	while (length > 0) { +		int cacheline_end = ALIGN(gpu_offset + 1, 64); +		int this_length = min(cacheline_end - gpu_offset, length); +		int swizzled_gpu_offset = gpu_offset ^ 64; + +		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, +				       cpu_vaddr + cpu_offset, +				       this_length); +		if (ret) +			return ret + length; + +		cpu_offset += this_length; +		gpu_offset += this_length; +		length -= this_length; +	} + +	return 0; +} +  /**   * This is the fallback shmem pread path, which allocates temporary storage   * in kernel space to copy_to_user into outside of the struct_mutex, so we @@ -398,72 +384,34 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,  			  struct drm_file *file)  {  	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; -	struct mm_struct *mm = current->mm; -	struct page **user_pages; +	char __user *user_data;  	ssize_t remain; -	loff_t offset, pinned_pages, i; -	loff_t first_data_page, last_data_page, num_pages; -	int shmem_page_offset; -	int data_page_index, data_page_offset; -	int page_length; -	int ret; -	uint64_t data_ptr = args->data_ptr; -	int do_bit17_swizzling; +	loff_t offset; +	int shmem_page_offset, page_length, ret; +	int obj_do_bit17_swizzling, page_do_bit17_swizzling; +	user_data = (char __user *) (uintptr_t) args->data_ptr;  	remain = args->size; -	/* Pin the user pages containing the data.  We can't fault while -	 * holding the struct mutex, yet we want to hold it while -	 * dereferencing the user data. -	 */ -	first_data_page = data_ptr / PAGE_SIZE; -	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; -	num_pages = last_data_page - first_data_page + 1; +	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); -	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); -	if (user_pages == NULL) -		return -ENOMEM; +	offset = args->offset;  	mutex_unlock(&dev->struct_mutex); -	down_read(&mm->mmap_sem); -	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, -				      num_pages, 1, 0, user_pages, NULL); -	up_read(&mm->mmap_sem); -	mutex_lock(&dev->struct_mutex); -	if (pinned_pages < num_pages) { -		ret = -EFAULT; -		goto out; -	} - -	ret = i915_gem_object_set_cpu_read_domain_range(obj, -							args->offset, -							args->size); -	if (ret) -		goto out; - -	do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); - -	offset = args->offset;  	while (remain > 0) {  		struct page *page; +		char *vaddr;  		/* Operation in this page  		 *  		 * shmem_page_offset = offset within page in shmem file -		 * data_page_index = page number in get_user_pages return -		 * data_page_offset = offset with data_page_index page.  		 * page_length = bytes to copy for this page  		 */  		shmem_page_offset = offset_in_page(offset); -		data_page_index = data_ptr / PAGE_SIZE - first_data_page; -		data_page_offset = offset_in_page(data_ptr); -  		page_length = remain;  		if ((shmem_page_offset + page_length) > PAGE_SIZE)  			page_length = PAGE_SIZE - shmem_page_offset; -		if ((data_page_offset + page_length) > PAGE_SIZE) -			page_length = PAGE_SIZE - data_page_offset;  		page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);  		if (IS_ERR(page)) { @@ -471,36 +419,38 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,  			goto out;  		} -		if (do_bit17_swizzling) { -			slow_shmem_bit17_copy(page, -					      shmem_page_offset, -					      user_pages[data_page_index], -					      data_page_offset, -					      page_length, -					      1); -		} else { -			slow_shmem_copy(user_pages[data_page_index], -					data_page_offset, -					page, -					shmem_page_offset, -					page_length); -		} +		page_do_bit17_swizzling = obj_do_bit17_swizzling && +			(page_to_phys(page) & (1 << 17)) != 0; + +		vaddr = kmap(page); +		if (page_do_bit17_swizzling) +			ret = __copy_to_user_swizzled(user_data, +						      vaddr, shmem_page_offset, +						      page_length); +		else +			ret = __copy_to_user(user_data, +					     vaddr + shmem_page_offset, +					     page_length); +		kunmap(page);  		mark_page_accessed(page);  		page_cache_release(page); +		if (ret) { +			ret = -EFAULT; +			goto out; +		} +  		remain -= page_length; -		data_ptr += page_length; +		user_data += page_length;  		offset += page_length;  	}  out: -	for (i = 0; i < pinned_pages; i++) { -		SetPageDirty(user_pages[i]); -		mark_page_accessed(user_pages[i]); -		page_cache_release(user_pages[i]); -	} -	drm_free_large(user_pages); +	mutex_lock(&dev->struct_mutex); +	/* Fixup: Kill any reinstated backing storage pages */ +	if (obj->madv == __I915_MADV_PURGED) +		i915_gem_object_truncate(obj);  	return ret;  } @@ -841,71 +791,36 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,  			   struct drm_file *file)  {  	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; -	struct mm_struct *mm = current->mm; -	struct page **user_pages;  	ssize_t remain; -	loff_t offset, pinned_pages, i; -	loff_t first_data_page, last_data_page, num_pages; -	int shmem_page_offset; -	int data_page_index,  data_page_offset; -	int page_length; -	int ret; -	uint64_t data_ptr = args->data_ptr; -	int do_bit17_swizzling; +	loff_t offset; +	char __user *user_data; +	int shmem_page_offset, page_length, ret; +	int obj_do_bit17_swizzling, page_do_bit17_swizzling; +	user_data = (char __user *) (uintptr_t) args->data_ptr;  	remain = args->size; -	/* Pin the user pages containing the data.  We can't fault while -	 * holding the struct mutex, and all of the pwrite implementations -	 * want to hold it while dereferencing the user data. -	 */ -	first_data_page = data_ptr / PAGE_SIZE; -	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; -	num_pages = last_data_page - first_data_page + 1; - -	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); -	if (user_pages == NULL) -		return -ENOMEM; - -	mutex_unlock(&dev->struct_mutex); -	down_read(&mm->mmap_sem); -	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, -				      num_pages, 0, 0, user_pages, NULL); -	up_read(&mm->mmap_sem); -	mutex_lock(&dev->struct_mutex); -	if (pinned_pages < num_pages) { -		ret = -EFAULT; -		goto out; -	} - -	ret = i915_gem_object_set_to_cpu_domain(obj, 1); -	if (ret) -		goto out; - -	do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); +	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);  	offset = args->offset;  	obj->dirty = 1; +	mutex_unlock(&dev->struct_mutex); +  	while (remain > 0) {  		struct page *page; +		char *vaddr;  		/* Operation in this page  		 *  		 * shmem_page_offset = offset within page in shmem file -		 * data_page_index = page number in get_user_pages return -		 * data_page_offset = offset with data_page_index page.  		 * page_length = bytes to copy for this page  		 */  		shmem_page_offset = offset_in_page(offset); -		data_page_index = data_ptr / PAGE_SIZE - first_data_page; -		data_page_offset = offset_in_page(data_ptr);  		page_length = remain;  		if ((shmem_page_offset + page_length) > PAGE_SIZE)  			page_length = PAGE_SIZE - shmem_page_offset; -		if ((data_page_offset + page_length) > PAGE_SIZE) -			page_length = PAGE_SIZE - data_page_offset;  		page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);  		if (IS_ERR(page)) { @@ -913,34 +828,45 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,  			goto out;  		} -		if (do_bit17_swizzling) { -			slow_shmem_bit17_copy(page, -					      shmem_page_offset, -					      user_pages[data_page_index], -					      data_page_offset, -					      page_length, -					      0); -		} else { -			slow_shmem_copy(page, -					shmem_page_offset, -					user_pages[data_page_index], -					data_page_offset, -					page_length); -		} +		page_do_bit17_swizzling = obj_do_bit17_swizzling && +			(page_to_phys(page) & (1 << 17)) != 0; + +		vaddr = kmap(page); +		if (page_do_bit17_swizzling) +			ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, +							user_data, +							page_length); +		else +			ret = __copy_from_user(vaddr + shmem_page_offset, +					       user_data, +					       page_length); +		kunmap(page);  		set_page_dirty(page);  		mark_page_accessed(page);  		page_cache_release(page); +		if (ret) { +			ret = -EFAULT; +			goto out; +		} +  		remain -= page_length; -		data_ptr += page_length; +		user_data += page_length;  		offset += page_length;  	}  out: -	for (i = 0; i < pinned_pages; i++) -		page_cache_release(user_pages[i]); -	drm_free_large(user_pages); +	mutex_lock(&dev->struct_mutex); +	/* Fixup: Kill any reinstated backing storage pages */ +	if (obj->madv == __I915_MADV_PURGED) +		i915_gem_object_truncate(obj); +	/* and flush dirty cachelines in case the object isn't in the cpu write +	 * domain anymore. */ +	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { +		i915_gem_clflush_object(obj); +		intel_gtt_chipset_flush(); +	}  	return ret;  } @@ -996,10 +922,13 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,  	 * pread/pwrite currently are reading and writing from the CPU  	 * perspective, requiring manual detiling by the client.  	 */ -	if (obj->phys_obj) +	if (obj->phys_obj) {  		ret = i915_gem_phys_pwrite(dev, obj, args, file); -	else if (obj->gtt_space && -		 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { +		goto out; +	} + +	if (obj->gtt_space && +	    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {  		ret = i915_gem_object_pin(obj, 0, true);  		if (ret)  			goto out; @@ -1018,18 +947,24 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,  out_unpin:  		i915_gem_object_unpin(obj); -	} else { -		ret = i915_gem_object_set_to_cpu_domain(obj, 1); -		if (ret) -			goto out; -		ret = -EFAULT; -		if (!i915_gem_object_needs_bit17_swizzle(obj)) -			ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file); -		if (ret == -EFAULT) -			ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file); +		if (ret != -EFAULT) +			goto out; +		/* Fall through to the shmfs paths because the gtt paths might +		 * fail with non-page-backed user pointers (e.g. gtt mappings +		 * when moving data between textures). */  	} +	ret = i915_gem_object_set_to_cpu_domain(obj, 1); +	if (ret) +		goto out; + +	ret = -EFAULT; +	if (!i915_gem_object_needs_bit17_swizzle(obj)) +		ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file); +	if (ret == -EFAULT) +		ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file); +  out:  	drm_gem_object_unreference(&obj->base);  unlock: @@ -1141,7 +1076,6 @@ int  i915_gem_mmap_ioctl(struct drm_device *dev, void *data,  		    struct drm_file *file)  { -	struct drm_i915_private *dev_priv = dev->dev_private;  	struct drm_i915_gem_mmap *args = data;  	struct drm_gem_object *obj;  	unsigned long addr; @@ -1153,11 +1087,6 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,  	if (obj == NULL)  		return -ENOENT; -	if (obj->size > dev_priv->mm.gtt_mappable_end) { -		drm_gem_object_unreference_unlocked(obj); -		return -E2BIG; -	} -  	down_write(¤t->mm->mmap_sem);  	addr = do_mmap(obj->filp, 0, args->size,  		       PROT_READ | PROT_WRITE, MAP_SHARED, @@ -1647,6 +1576,28 @@ i915_gem_process_flushing_list(struct intel_ring_buffer *ring,  	}  } +static u32 +i915_gem_get_seqno(struct drm_device *dev) +{ +	drm_i915_private_t *dev_priv = dev->dev_private; +	u32 seqno = dev_priv->next_seqno; + +	/* reserve 0 for non-seqno */ +	if (++dev_priv->next_seqno == 0) +		dev_priv->next_seqno = 1; + +	return seqno; +} + +u32 +i915_gem_next_request_seqno(struct intel_ring_buffer *ring) +{ +	if (ring->outstanding_lazy_request == 0) +		ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev); + +	return ring->outstanding_lazy_request; +} +  int  i915_add_request(struct intel_ring_buffer *ring,  		 struct drm_file *file, @@ -1654,10 +1605,19 @@ i915_add_request(struct intel_ring_buffer *ring,  {  	drm_i915_private_t *dev_priv = ring->dev->dev_private;  	uint32_t seqno; +	u32 request_ring_position;  	int was_empty;  	int ret;  	BUG_ON(request == NULL); +	seqno = i915_gem_next_request_seqno(ring); + +	/* Record the position of the start of the request so that +	 * should we detect the updated seqno part-way through the +	 * GPU processing the request, we never over-estimate the +	 * position of the head. +	 */ +	request_ring_position = intel_ring_get_tail(ring);  	ret = ring->add_request(ring, &seqno);  	if (ret) @@ -1667,6 +1627,7 @@ i915_add_request(struct intel_ring_buffer *ring,  	request->seqno = seqno;  	request->ring = ring; +	request->tail = request_ring_position;  	request->emitted_jiffies = jiffies;  	was_empty = list_empty(&ring->request_list);  	list_add_tail(&request->list, &ring->request_list); @@ -1681,7 +1642,7 @@ i915_add_request(struct intel_ring_buffer *ring,  		spin_unlock(&file_priv->mm.lock);  	} -	ring->outstanding_lazy_request = false; +	ring->outstanding_lazy_request = 0;  	if (!dev_priv->mm.suspended) {  		if (i915_enable_hangcheck) { @@ -1803,7 +1764,7 @@ void i915_gem_reset(struct drm_device *dev)  /**   * This function clears the request list as sequence numbers are passed.   */ -static void +void  i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)  {  	uint32_t seqno; @@ -1831,6 +1792,12 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)  			break;  		trace_i915_gem_request_retire(ring, request->seqno); +		/* We know the GPU must have read the request to have +		 * sent us the seqno + interrupt, so use the position +		 * of tail of the request to update the last known position +		 * of the GPU head. +		 */ +		ring->last_retired_head = request->tail;  		list_del(&request->list);  		i915_gem_request_remove_from_client(request); @@ -1943,7 +1910,8 @@ i915_gem_retire_work_handler(struct work_struct *work)   */  int  i915_wait_request(struct intel_ring_buffer *ring, -		  uint32_t seqno) +		  uint32_t seqno, +		  bool do_retire)  {  	drm_i915_private_t *dev_priv = ring->dev->dev_private;  	u32 ier; @@ -2017,17 +1985,12 @@ i915_wait_request(struct intel_ring_buffer *ring,  	if (atomic_read(&dev_priv->mm.wedged))  		ret = -EAGAIN; -	if (ret && ret != -ERESTARTSYS) -		DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n", -			  __func__, ret, seqno, ring->get_seqno(ring), -			  dev_priv->next_seqno); -  	/* Directly dispatch request retiring.  While we have the work queue  	 * to handle this, the waiter on a request often wants an associated  	 * buffer to have made it to the inactive list, and we would need  	 * a separate wait queue to handle that.  	 */ -	if (ret == 0) +	if (ret == 0 && do_retire)  		i915_gem_retire_requests_ring(ring);  	return ret; @@ -2051,7 +2014,8 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)  	 * it.  	 */  	if (obj->active) { -		ret = i915_wait_request(obj->ring, obj->last_rendering_seqno); +		ret = i915_wait_request(obj->ring, obj->last_rendering_seqno, +					true);  		if (ret)  			return ret;  	} @@ -2089,6 +2053,7 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)  int  i915_gem_object_unbind(struct drm_i915_gem_object *obj)  { +	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;  	int ret = 0;  	if (obj->gtt_space == NULL) @@ -2133,6 +2098,11 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)  	trace_i915_gem_object_unbind(obj);  	i915_gem_gtt_unbind_object(obj); +	if (obj->has_aliasing_ppgtt_mapping) { +		i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); +		obj->has_aliasing_ppgtt_mapping = 0; +	} +  	i915_gem_object_put_pages_gtt(obj);  	list_del_init(&obj->gtt_list); @@ -2172,7 +2142,7 @@ i915_gem_flush_ring(struct intel_ring_buffer *ring,  	return 0;  } -static int i915_ring_idle(struct intel_ring_buffer *ring) +static int i915_ring_idle(struct intel_ring_buffer *ring, bool do_retire)  {  	int ret; @@ -2186,18 +2156,18 @@ static int i915_ring_idle(struct intel_ring_buffer *ring)  			return ret;  	} -	return i915_wait_request(ring, i915_gem_next_request_seqno(ring)); +	return i915_wait_request(ring, i915_gem_next_request_seqno(ring), +				 do_retire);  } -int -i915_gpu_idle(struct drm_device *dev) +int i915_gpu_idle(struct drm_device *dev, bool do_retire)  {  	drm_i915_private_t *dev_priv = dev->dev_private;  	int ret, i;  	/* Flush everything onto the inactive list. */  	for (i = 0; i < I915_NUM_RINGS; i++) { -		ret = i915_ring_idle(&dev_priv->ring[i]); +		ret = i915_ring_idle(&dev_priv->ring[i], do_retire);  		if (ret)  			return ret;  	} @@ -2400,7 +2370,8 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj,  		if (!ring_passed_seqno(obj->last_fenced_ring,  				       obj->last_fenced_seqno)) {  			ret = i915_wait_request(obj->last_fenced_ring, -						obj->last_fenced_seqno); +						obj->last_fenced_seqno, +						true);  			if (ret)  				return ret;  		} @@ -2432,6 +2403,8 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj)  	if (obj->fence_reg != I915_FENCE_REG_NONE) {  		struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + +		WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count);  		i915_gem_clear_fence_reg(obj->base.dev,  					 &dev_priv->fence_regs[obj->fence_reg]); @@ -2456,7 +2429,7 @@ i915_find_fence_reg(struct drm_device *dev,  		if (!reg->obj)  			return reg; -		if (!reg->obj->pin_count) +		if (!reg->pin_count)  			avail = reg;  	} @@ -2466,7 +2439,7 @@ i915_find_fence_reg(struct drm_device *dev,  	/* None available, try to steal one or wait for a user to finish */  	avail = first = NULL;  	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { -		if (reg->obj->pin_count) +		if (reg->pin_count)  			continue;  		if (first == NULL) @@ -2541,7 +2514,8 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj,  				if (!ring_passed_seqno(obj->last_fenced_ring,  						       reg->setup_seqno)) {  					ret = i915_wait_request(obj->last_fenced_ring, -								reg->setup_seqno); +								reg->setup_seqno, +								true);  					if (ret)  						return ret;  				} @@ -2560,7 +2534,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj,  	reg = i915_find_fence_reg(dev, pipelined);  	if (reg == NULL) -		return -ENOSPC; +		return -EDEADLK;  	ret = i915_gem_object_flush_fence(obj, pipelined);  	if (ret) @@ -2660,6 +2634,7 @@ i915_gem_clear_fence_reg(struct drm_device *dev,  	list_del_init(®->lru_list);  	reg->obj = NULL;  	reg->setup_seqno = 0; +	reg->pin_count = 0;  }  /** @@ -2946,6 +2921,8 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)  int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,  				    enum i915_cache_level cache_level)  { +	struct drm_device *dev = obj->base.dev; +	drm_i915_private_t *dev_priv = dev->dev_private;  	int ret;  	if (obj->cache_level == cache_level) @@ -2974,6 +2951,9 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,  		}  		i915_gem_gtt_rebind_object(obj, cache_level); +		if (obj->has_aliasing_ppgtt_mapping) +			i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, +					       obj, cache_level);  	}  	if (cache_level == I915_CACHE_NONE) { @@ -3084,10 +3064,13 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)  			return ret;  	} +	ret = i915_gem_object_wait_rendering(obj); +	if (ret) +		return ret; +  	/* Ensure that we invalidate the GPU's caches and TLBs. */  	obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; - -	return i915_gem_object_wait_rendering(obj); +	return 0;  }  /** @@ -3619,8 +3602,8 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,  	obj->base.write_domain = I915_GEM_DOMAIN_CPU;  	obj->base.read_domains = I915_GEM_DOMAIN_CPU; -	if (IS_GEN6(dev) || IS_GEN7(dev)) { -		/* On Gen6, we can have the GPU use the LLC (the CPU +	if (HAS_LLC(dev)) { +		/* On some devices, we can have the GPU use the LLC (the CPU  		 * cache) for about a 10% performance improvement  		 * compared to uncached.  Graphics requests other than  		 * display scanout are coherent with the CPU in @@ -3710,7 +3693,7 @@ i915_gem_idle(struct drm_device *dev)  		return 0;  	} -	ret = i915_gpu_idle(dev); +	ret = i915_gpu_idle(dev, true);  	if (ret) {  		mutex_unlock(&dev->struct_mutex);  		return ret; @@ -3745,12 +3728,71 @@ i915_gem_idle(struct drm_device *dev)  	return 0;  } +void i915_gem_init_swizzling(struct drm_device *dev) +{ +	drm_i915_private_t *dev_priv = dev->dev_private; + +	if (INTEL_INFO(dev)->gen < 5 || +	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) +		return; + +	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | +				 DISP_TILE_SURFACE_SWIZZLING); + +	if (IS_GEN5(dev)) +		return; + +	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); +	if (IS_GEN6(dev)) +		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB)); +	else +		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB)); +} + +void i915_gem_init_ppgtt(struct drm_device *dev) +{ +	drm_i915_private_t *dev_priv = dev->dev_private; +	uint32_t pd_offset; +	struct intel_ring_buffer *ring; +	int i; + +	if (!dev_priv->mm.aliasing_ppgtt) +		return; + +	pd_offset = dev_priv->mm.aliasing_ppgtt->pd_offset; +	pd_offset /= 64; /* in cachelines, */ +	pd_offset <<= 16; + +	if (INTEL_INFO(dev)->gen == 6) { +		uint32_t ecochk = I915_READ(GAM_ECOCHK); +		I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | +				       ECOCHK_PPGTT_CACHE64B); +		I915_WRITE(GFX_MODE, GFX_MODE_ENABLE(GFX_PPGTT_ENABLE)); +	} else if (INTEL_INFO(dev)->gen >= 7) { +		I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B); +		/* GFX_MODE is per-ring on gen7+ */ +	} + +	for (i = 0; i < I915_NUM_RINGS; i++) { +		ring = &dev_priv->ring[i]; + +		if (INTEL_INFO(dev)->gen >= 7) +			I915_WRITE(RING_MODE_GEN7(ring), +				   GFX_MODE_ENABLE(GFX_PPGTT_ENABLE)); + +		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); +		I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); +	} +} +  int -i915_gem_init_ringbuffer(struct drm_device *dev) +i915_gem_init_hw(struct drm_device *dev)  {  	drm_i915_private_t *dev_priv = dev->dev_private;  	int ret; +	i915_gem_init_swizzling(dev); +  	ret = intel_init_render_ring_buffer(dev);  	if (ret)  		return ret; @@ -3769,6 +3811,8 @@ i915_gem_init_ringbuffer(struct drm_device *dev)  	dev_priv->next_seqno = 1; +	i915_gem_init_ppgtt(dev); +  	return 0;  cleanup_bsd_ring: @@ -3806,7 +3850,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,  	mutex_lock(&dev->struct_mutex);  	dev_priv->mm.suspended = 0; -	ret = i915_gem_init_ringbuffer(dev); +	ret = i915_gem_init_hw(dev);  	if (ret != 0) {  		mutex_unlock(&dev->struct_mutex);  		return ret; @@ -4201,7 +4245,7 @@ rescan:  		 * This has a dramatic impact to reduce the number of  		 * OOM-killer events whilst running the GPU aggressively.  		 */ -		if (i915_gpu_idle(dev) == 0) +		if (i915_gpu_idle(dev, true) == 0)  			goto rescan;  	}  	mutex_unlock(&dev->struct_mutex);  |