diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 391 | 
1 files changed, 82 insertions, 309 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 8dd9a6f47db..3eea143749f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -33,180 +33,6 @@  #include "intel_drv.h"  #include <linux/dma_remapping.h> -struct change_domains { -	uint32_t invalidate_domains; -	uint32_t flush_domains; -	uint32_t flush_rings; -	uint32_t flips; -}; - -/* - * Set the next domain for the specified object. This - * may not actually perform the necessary flushing/invaliding though, - * as that may want to be batched with other set_domain operations - * - * This is (we hope) the only really tricky part of gem. The goal - * is fairly simple -- track which caches hold bits of the object - * and make sure they remain coherent. A few concrete examples may - * help to explain how it works. For shorthand, we use the notation - * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the - * a pair of read and write domain masks. - * - * Case 1: the batch buffer - * - *	1. Allocated - *	2. Written by CPU - *	3. Mapped to GTT - *	4. Read by GPU - *	5. Unmapped from GTT - *	6. Freed - * - *	Let's take these a step at a time - * - *	1. Allocated - *		Pages allocated from the kernel may still have - *		cache contents, so we set them to (CPU, CPU) always. - *	2. Written by CPU (using pwrite) - *		The pwrite function calls set_domain (CPU, CPU) and - *		this function does nothing (as nothing changes) - *	3. Mapped by GTT - *		This function asserts that the object is not - *		currently in any GPU-based read or write domains - *	4. Read by GPU - *		i915_gem_execbuffer calls set_domain (COMMAND, 0). - *		As write_domain is zero, this function adds in the - *		current read domains (CPU+COMMAND, 0). - *		flush_domains is set to CPU. - *		invalidate_domains is set to COMMAND - *		clflush is run to get data out of the CPU caches - *		then i915_dev_set_domain calls i915_gem_flush to - *		emit an MI_FLUSH and drm_agp_chipset_flush - *	5. Unmapped from GTT - *		i915_gem_object_unbind calls set_domain (CPU, CPU) - *		flush_domains and invalidate_domains end up both zero - *		so no flushing/invalidating happens - *	6. Freed - *		yay, done - * - * Case 2: The shared render buffer - * - *	1. Allocated - *	2. Mapped to GTT - *	3. Read/written by GPU - *	4. set_domain to (CPU,CPU) - *	5. Read/written by CPU - *	6. Read/written by GPU - * - *	1. Allocated - *		Same as last example, (CPU, CPU) - *	2. Mapped to GTT - *		Nothing changes (assertions find that it is not in the GPU) - *	3. Read/written by GPU - *		execbuffer calls set_domain (RENDER, RENDER) - *		flush_domains gets CPU - *		invalidate_domains gets GPU - *		clflush (obj) - *		MI_FLUSH and drm_agp_chipset_flush - *	4. set_domain (CPU, CPU) - *		flush_domains gets GPU - *		invalidate_domains gets CPU - *		wait_rendering (obj) to make sure all drawing is complete. - *		This will include an MI_FLUSH to get the data from GPU - *		to memory - *		clflush (obj) to invalidate the CPU cache - *		Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) - *	5. Read/written by CPU - *		cache lines are loaded and dirtied - *	6. Read written by GPU - *		Same as last GPU access - * - * Case 3: The constant buffer - * - *	1. Allocated - *	2. Written by CPU - *	3. Read by GPU - *	4. Updated (written) by CPU again - *	5. Read by GPU - * - *	1. Allocated - *		(CPU, CPU) - *	2. Written by CPU - *		(CPU, CPU) - *	3. Read by GPU - *		(CPU+RENDER, 0) - *		flush_domains = CPU - *		invalidate_domains = RENDER - *		clflush (obj) - *		MI_FLUSH - *		drm_agp_chipset_flush - *	4. Updated (written) by CPU again - *		(CPU, CPU) - *		flush_domains = 0 (no previous write domain) - *		invalidate_domains = 0 (no new read domains) - *	5. Read by GPU - *		(CPU+RENDER, 0) - *		flush_domains = CPU - *		invalidate_domains = RENDER - *		clflush (obj) - *		MI_FLUSH - *		drm_agp_chipset_flush - */ -static void -i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, -				  struct intel_ring_buffer *ring, -				  struct change_domains *cd) -{ -	uint32_t invalidate_domains = 0, flush_domains = 0; - -	/* -	 * If the object isn't moving to a new write domain, -	 * let the object stay in multiple read domains -	 */ -	if (obj->base.pending_write_domain == 0) -		obj->base.pending_read_domains |= obj->base.read_domains; - -	/* -	 * Flush the current write domain if -	 * the new read domains don't match. Invalidate -	 * any read domains which differ from the old -	 * write domain -	 */ -	if (obj->base.write_domain && -	    (((obj->base.write_domain != obj->base.pending_read_domains || -	       obj->ring != ring)) || -	     (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) { -		flush_domains |= obj->base.write_domain; -		invalidate_domains |= -			obj->base.pending_read_domains & ~obj->base.write_domain; -	} -	/* -	 * Invalidate any read caches which may have -	 * stale data. That is, any new read domains. -	 */ -	invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains; -	if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) -		i915_gem_clflush_object(obj); - -	if (obj->base.pending_write_domain) -		cd->flips |= atomic_read(&obj->pending_flip); - -	/* The actual obj->write_domain will be updated with -	 * pending_write_domain after we emit the accumulated flush for all -	 * of our domain changes in execbuffers (which clears objects' -	 * write_domains).  So if we have a current write domain that we -	 * aren't changing, set pending_write_domain to that. -	 */ -	if (flush_domains == 0 && obj->base.pending_write_domain == 0) -		obj->base.pending_write_domain = obj->base.write_domain; - -	cd->invalidate_domains |= invalidate_domains; -	cd->flush_domains |= flush_domains; -	if (flush_domains & I915_GEM_GPU_DOMAINS) -		cd->flush_rings |= intel_ring_flag(obj->ring); -	if (invalidate_domains & I915_GEM_GPU_DOMAINS) -		cd->flush_rings |= intel_ring_flag(ring); -} -  struct eb_objects {  	int and;  	struct hlist_head buckets[0]; @@ -217,6 +43,7 @@ eb_create(int size)  {  	struct eb_objects *eb;  	int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; +	BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head)));  	while (count > size)  		count >>= 1;  	eb = kzalloc(count*sizeof(struct hlist_head) + @@ -268,6 +95,7 @@ eb_destroy(struct eb_objects *eb)  static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)  {  	return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || +		!obj->map_and_fenceable ||  		obj->cache_level != I915_CACHE_NONE);  } @@ -382,7 +210,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,  		if (ret)  			return ret; -		vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]); +		vaddr = kmap_atomic(i915_gem_object_get_page(obj, +							     reloc->offset >> PAGE_SHIFT));  		*(uint32_t *)(vaddr + page_offset) = reloc->delta;  		kunmap_atomic(vaddr);  	} else { @@ -503,7 +332,8 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,  	return ret;  } -#define  __EXEC_OBJECT_HAS_FENCE (1<<31) +#define  __EXEC_OBJECT_HAS_PIN (1<<31) +#define  __EXEC_OBJECT_HAS_FENCE (1<<30)  static int  need_reloc_mappable(struct drm_i915_gem_object *obj) @@ -513,9 +343,10 @@ need_reloc_mappable(struct drm_i915_gem_object *obj)  }  static int -pin_and_fence_object(struct drm_i915_gem_object *obj, -		     struct intel_ring_buffer *ring) +i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, +				   struct intel_ring_buffer *ring)  { +	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;  	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;  	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;  	bool need_fence, need_mappable; @@ -527,15 +358,17 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,  		obj->tiling_mode != I915_TILING_NONE;  	need_mappable = need_fence || need_reloc_mappable(obj); -	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable); +	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false);  	if (ret)  		return ret; +	entry->flags |= __EXEC_OBJECT_HAS_PIN; +  	if (has_fenced_gpu_access) {  		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {  			ret = i915_gem_object_get_fence(obj);  			if (ret) -				goto err_unpin; +				return ret;  			if (i915_gem_object_pin_fence(obj))  				entry->flags |= __EXEC_OBJECT_HAS_FENCE; @@ -544,12 +377,35 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,  		}  	} +	/* Ensure ppgtt mapping exists if needed */ +	if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { +		i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, +				       obj, obj->cache_level); + +		obj->has_aliasing_ppgtt_mapping = 1; +	} +  	entry->offset = obj->gtt_offset;  	return 0; +} -err_unpin: -	i915_gem_object_unpin(obj); -	return ret; +static void +i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj) +{ +	struct drm_i915_gem_exec_object2 *entry; + +	if (!obj->gtt_space) +		return; + +	entry = obj->exec_entry; + +	if (entry->flags & __EXEC_OBJECT_HAS_FENCE) +		i915_gem_object_unpin_fence(obj); + +	if (entry->flags & __EXEC_OBJECT_HAS_PIN) +		i915_gem_object_unpin(obj); + +	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);  }  static int @@ -557,11 +413,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,  			    struct drm_file *file,  			    struct list_head *objects)  { -	drm_i915_private_t *dev_priv = ring->dev->dev_private;  	struct drm_i915_gem_object *obj; -	int ret, retry; -	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;  	struct list_head ordered_objects; +	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; +	int retry;  	INIT_LIST_HEAD(&ordered_objects);  	while (!list_empty(objects)) { @@ -586,6 +441,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,  		obj->base.pending_read_domains = 0;  		obj->base.pending_write_domain = 0; +		obj->pending_fenced_gpu_access = false;  	}  	list_splice(&ordered_objects, objects); @@ -598,12 +454,12 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,  	 * 2.  Bind new objects.  	 * 3.  Decrement pin count.  	 * -	 * This avoid unnecessary unbinding of later objects in order to makr +	 * This avoid unnecessary unbinding of later objects in order to make  	 * room for the earlier objects *unless* we need to defragment.  	 */  	retry = 0;  	do { -		ret = 0; +		int ret = 0;  		/* Unbind any ill-fitting objects or pin. */  		list_for_each_entry(obj, objects, exec_list) { @@ -623,7 +479,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,  			    (need_mappable && !obj->map_and_fenceable))  				ret = i915_gem_object_unbind(obj);  			else -				ret = pin_and_fence_object(obj, ring); +				ret = i915_gem_execbuffer_reserve_object(obj, ring);  			if (ret)  				goto err;  		} @@ -633,77 +489,22 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,  			if (obj->gtt_space)  				continue; -			ret = pin_and_fence_object(obj, ring); -			if (ret) { -				int ret_ignore; - -				/* This can potentially raise a harmless -				 * -EINVAL if we failed to bind in the above -				 * call. It cannot raise -EINTR since we know -				 * that the bo is freshly bound and so will -				 * not need to be flushed or waited upon. -				 */ -				ret_ignore = i915_gem_object_unbind(obj); -				(void)ret_ignore; -				WARN_ON(obj->gtt_space); -				break; -			} +			ret = i915_gem_execbuffer_reserve_object(obj, ring); +			if (ret) +				goto err;  		} -		/* Decrement pin count for bound objects */ -		list_for_each_entry(obj, objects, exec_list) { -			struct drm_i915_gem_exec_object2 *entry; - -			if (!obj->gtt_space) -				continue; - -			entry = obj->exec_entry; -			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { -				i915_gem_object_unpin_fence(obj); -				entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; -			} - -			i915_gem_object_unpin(obj); - -			/* ... and ensure ppgtt mapping exist if needed. */ -			if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { -				i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, -						       obj, obj->cache_level); - -				obj->has_aliasing_ppgtt_mapping = 1; -			} -		} +err:		/* Decrement pin count for bound objects */ +		list_for_each_entry(obj, objects, exec_list) +			i915_gem_execbuffer_unreserve_object(obj); -		if (ret != -ENOSPC || retry > 1) +		if (ret != -ENOSPC || retry++)  			return ret; -		/* First attempt, just clear anything that is purgeable. -		 * Second attempt, clear the entire GTT. -		 */ -		ret = i915_gem_evict_everything(ring->dev, retry == 0); +		ret = i915_gem_evict_everything(ring->dev);  		if (ret)  			return ret; - -		retry++;  	} while (1); - -err: -	list_for_each_entry_continue_reverse(obj, objects, exec_list) { -		struct drm_i915_gem_exec_object2 *entry; - -		if (!obj->gtt_space) -			continue; - -		entry = obj->exec_entry; -		if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { -			i915_gem_object_unpin_fence(obj); -			entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; -		} - -		i915_gem_object_unpin(obj); -	} - -	return ret;  }  static int @@ -809,18 +610,6 @@ err:  	return ret;  } -static void -i915_gem_execbuffer_flush(struct drm_device *dev, -			  uint32_t invalidate_domains, -			  uint32_t flush_domains) -{ -	if (flush_domains & I915_GEM_DOMAIN_CPU) -		intel_gtt_chipset_flush(); - -	if (flush_domains & I915_GEM_DOMAIN_GTT) -		wmb(); -} -  static int  i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)  { @@ -853,48 +642,45 @@ i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)  	return 0;  } -  static int  i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,  				struct list_head *objects)  {  	struct drm_i915_gem_object *obj; -	struct change_domains cd; +	uint32_t flush_domains = 0; +	uint32_t flips = 0;  	int ret; -	memset(&cd, 0, sizeof(cd)); -	list_for_each_entry(obj, objects, exec_list) -		i915_gem_object_set_to_gpu_domain(obj, ring, &cd); - -	if (cd.invalidate_domains | cd.flush_domains) { -		i915_gem_execbuffer_flush(ring->dev, -					  cd.invalidate_domains, -					  cd.flush_domains); -	} - -	if (cd.flips) { -		ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips); +	list_for_each_entry(obj, objects, exec_list) { +		ret = i915_gem_object_sync(obj, ring);  		if (ret)  			return ret; + +		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) +			i915_gem_clflush_object(obj); + +		if (obj->base.pending_write_domain) +			flips |= atomic_read(&obj->pending_flip); + +		flush_domains |= obj->base.write_domain;  	} -	list_for_each_entry(obj, objects, exec_list) { -		ret = i915_gem_object_sync(obj, ring); +	if (flips) { +		ret = i915_gem_execbuffer_wait_for_flips(ring, flips);  		if (ret)  			return ret;  	} +	if (flush_domains & I915_GEM_DOMAIN_CPU) +		intel_gtt_chipset_flush(); + +	if (flush_domains & I915_GEM_DOMAIN_GTT) +		wmb(); +  	/* Unconditionally invalidate gpu caches and ensure that we do flush  	 * any residual writes from the previous batch.  	 */ -	ret = i915_gem_flush_ring(ring, -				  I915_GEM_GPU_DOMAINS, -				  ring->gpu_caches_dirty ? I915_GEM_GPU_DOMAINS : 0); -	if (ret) -		return ret; - -	ring->gpu_caches_dirty = false; -	return 0; +	return intel_ring_invalidate_all_caches(ring);  }  static bool @@ -942,9 +728,8 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,  	struct drm_i915_gem_object *obj;  	list_for_each_entry(obj, objects, exec_list) { -		  u32 old_read = obj->base.read_domains; -		  u32 old_write = obj->base.write_domain; - +		u32 old_read = obj->base.read_domains; +		u32 old_write = obj->base.write_domain;  		obj->base.read_domains = obj->base.pending_read_domains;  		obj->base.write_domain = obj->base.pending_write_domain; @@ -953,17 +738,13 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,  		i915_gem_object_move_to_active(obj, ring, seqno);  		if (obj->base.write_domain) {  			obj->dirty = 1; -			obj->pending_gpu_write = true; -			list_move_tail(&obj->gpu_write_list, -				       &ring->gpu_write_list); +			obj->last_write_seqno = seqno;  			if (obj->pin_count) /* check for potential scanout */ -				intel_mark_busy(ring->dev, obj); +				intel_mark_fb_busy(obj);  		}  		trace_i915_gem_object_change_domain(obj, old_read, old_write);  	} - -	intel_mark_busy(ring->dev, NULL);  }  static void @@ -971,16 +752,11 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,  				    struct drm_file *file,  				    struct intel_ring_buffer *ring)  { -	struct drm_i915_gem_request *request; -  	/* Unconditionally force add_request to emit a full flush. */  	ring->gpu_caches_dirty = true;  	/* Add a breadcrumb for the completion of the batch buffer */ -	request = kzalloc(sizeof(*request), GFP_KERNEL); -	if (request == NULL || i915_add_request(ring, file, request)) { -		kfree(request); -	} +	(void)i915_add_request(ring, file, NULL);  }  static int @@ -1326,8 +1102,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,  		return -ENOMEM;  	}  	ret = copy_from_user(exec_list, -			     (struct drm_i915_relocation_entry __user *) -			     (uintptr_t) args->buffers_ptr, +			     (void __user *)(uintptr_t)args->buffers_ptr,  			     sizeof(*exec_list) * args->buffer_count);  	if (ret != 0) {  		DRM_DEBUG("copy %d exec entries failed %d\n", @@ -1366,8 +1141,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,  		for (i = 0; i < args->buffer_count; i++)  			exec_list[i].offset = exec2_list[i].offset;  		/* ... and back out to userspace */ -		ret = copy_to_user((struct drm_i915_relocation_entry __user *) -				   (uintptr_t) args->buffers_ptr, +		ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,  				   exec_list,  				   sizeof(*exec_list) * args->buffer_count);  		if (ret) { @@ -1421,8 +1195,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,  	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);  	if (!ret) {  		/* Copy the new buffer offsets back to the user's exec list. */ -		ret = copy_to_user((struct drm_i915_relocation_entry __user *) -				   (uintptr_t) args->buffers_ptr, +		ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,  				   exec2_list,  				   sizeof(*exec2_list) * args->buffer_count);  		if (ret) {  |