diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 221 | 
1 files changed, 143 insertions, 78 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b189b49c760..c0ae6bbbd9b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -349,7 +349,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,  	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;  	num_pages = last_data_page - first_data_page + 1; -	user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); +	user_pages = drm_calloc_large(num_pages, sizeof(struct page *));  	if (user_pages == NULL)  		return -ENOMEM; @@ -429,7 +429,7 @@ fail_put_user_pages:  		SetPageDirty(user_pages[i]);  		page_cache_release(user_pages[i]);  	} -	kfree(user_pages); +	drm_free_large(user_pages);  	return ret;  } @@ -649,7 +649,7 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,  	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;  	num_pages = last_data_page - first_data_page + 1; -	user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); +	user_pages = drm_calloc_large(num_pages, sizeof(struct page *));  	if (user_pages == NULL)  		return -ENOMEM; @@ -719,7 +719,7 @@ out_unlock:  out_unpin_pages:  	for (i = 0; i < pinned_pages; i++)  		page_cache_release(user_pages[i]); -	kfree(user_pages); +	drm_free_large(user_pages);  	return ret;  } @@ -824,7 +824,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,  	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;  	num_pages = last_data_page - first_data_page + 1; -	user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); +	user_pages = drm_calloc_large(num_pages, sizeof(struct page *));  	if (user_pages == NULL)  		return -ENOMEM; @@ -902,7 +902,7 @@ fail_unlock:  fail_put_user_pages:  	for (i = 0; i < pinned_pages; i++)  		page_cache_release(user_pages[i]); -	kfree(user_pages); +	drm_free_large(user_pages);  	return ret;  } @@ -989,10 +989,10 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,  		return -ENODEV;  	/* Only handle setting domains to types used by the CPU. */ -	if (write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) +	if (write_domain & I915_GEM_GPU_DOMAINS)  		return -EINVAL; -	if (read_domains & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) +	if (read_domains & I915_GEM_GPU_DOMAINS)  		return -EINVAL;  	/* Having something in the write domain implies it's in the read @@ -1145,7 +1145,14 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)  			mutex_unlock(&dev->struct_mutex);  			return VM_FAULT_SIGBUS;  		} -		list_add(&obj_priv->list, &dev_priv->mm.inactive_list); + +		ret = i915_gem_object_set_to_gtt_domain(obj, write); +		if (ret) { +			mutex_unlock(&dev->struct_mutex); +			return VM_FAULT_SIGBUS; +		} + +		list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);  	}  	/* Need a new fence register? */ @@ -1375,7 +1382,7 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,  			mutex_unlock(&dev->struct_mutex);  			return ret;  		} -		list_add(&obj_priv->list, &dev_priv->mm.inactive_list); +		list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);  	}  	drm_gem_object_unreference(obj); @@ -1408,9 +1415,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)  		}  	obj_priv->dirty = 0; -	drm_free(obj_priv->pages, -		 page_count * sizeof(struct page *), -		 DRM_MEM_DRIVER); +	drm_free_large(obj_priv->pages);  	obj_priv->pages = NULL;  } @@ -1476,14 +1481,19 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj)   * Returned sequence numbers are nonzero on success.   */  static uint32_t -i915_add_request(struct drm_device *dev, uint32_t flush_domains) +i915_add_request(struct drm_device *dev, struct drm_file *file_priv, +		 uint32_t flush_domains)  {  	drm_i915_private_t *dev_priv = dev->dev_private; +	struct drm_i915_file_private *i915_file_priv = NULL;  	struct drm_i915_gem_request *request;  	uint32_t seqno;  	int was_empty;  	RING_LOCALS; +	if (file_priv != NULL) +		i915_file_priv = file_priv->driver_priv; +  	request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER);  	if (request == NULL)  		return 0; @@ -1510,6 +1520,12 @@ i915_add_request(struct drm_device *dev, uint32_t flush_domains)  	request->emitted_jiffies = jiffies;  	was_empty = list_empty(&dev_priv->mm.request_list);  	list_add_tail(&request->list, &dev_priv->mm.request_list); +	if (i915_file_priv) { +		list_add_tail(&request->client_list, +			      &i915_file_priv->mm.request_list); +	} else { +		INIT_LIST_HEAD(&request->client_list); +	}  	/* Associate any objects on the flushing list matching the write  	 * domain we're flushing with our flush. @@ -1659,6 +1675,7 @@ i915_gem_retire_requests(struct drm_device *dev)  			i915_gem_retire_request(dev, request);  			list_del(&request->list); +			list_del(&request->client_list);  			drm_free(request, sizeof(*request), DRM_MEM_DRIVER);  		} else  			break; @@ -1697,7 +1714,10 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)  	BUG_ON(seqno == 0);  	if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { -		ier = I915_READ(IER); +		if (IS_IGDNG(dev)) +			ier = I915_READ(DEIER) | I915_READ(GTIER); +		else +			ier = I915_READ(IER);  		if (!ier) {  			DRM_ERROR("something (likely vbetool) disabled "  				  "interrupts, re-enabling\n"); @@ -1749,8 +1769,7 @@ i915_gem_flush(struct drm_device *dev,  	if (flush_domains & I915_GEM_DOMAIN_CPU)  		drm_agp_chipset_flush(dev); -	if ((invalidate_domains | flush_domains) & ~(I915_GEM_DOMAIN_CPU | -						     I915_GEM_DOMAIN_GTT)) { +	if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) {  		/*  		 * read/write caches:  		 * @@ -1972,7 +1991,7 @@ i915_gem_evict_something(struct drm_device *dev)  			i915_gem_flush(dev,  				       obj->write_domain,  				       obj->write_domain); -			i915_add_request(dev, obj->write_domain); +			i915_add_request(dev, NULL, obj->write_domain);  			obj = NULL;  			continue; @@ -1986,7 +2005,7 @@ i915_gem_evict_something(struct drm_device *dev)  		/* If we didn't do any of the above, there's nothing to be done  		 * and we just can't fit it in.  		 */ -		return -ENOMEM; +		return -ENOSPC;  	}  	return ret;  } @@ -2001,7 +2020,7 @@ i915_gem_evict_everything(struct drm_device *dev)  		if (ret != 0)  			break;  	} -	if (ret == -ENOMEM) +	if (ret == -ENOSPC)  		return 0;  	return ret;  } @@ -2024,8 +2043,7 @@ i915_gem_object_get_pages(struct drm_gem_object *obj)  	 */  	page_count = obj->size / PAGE_SIZE;  	BUG_ON(obj_priv->pages != NULL); -	obj_priv->pages = drm_calloc(page_count, sizeof(struct page *), -				     DRM_MEM_DRIVER); +	obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));  	if (obj_priv->pages == NULL) {  		DRM_ERROR("Faled to allocate page list\n");  		obj_priv->pages_refcount--; @@ -2131,8 +2149,10 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)  		return;  	} -	pitch_val = (obj_priv->stride / 128) - 1; -	WARN_ON(pitch_val & ~0x0000000f); +	pitch_val = obj_priv->stride / 128; +	pitch_val = ffs(pitch_val) - 1; +	WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL); +  	val = obj_priv->gtt_offset;  	if (obj_priv->tiling_mode == I915_TILING_Y)  		val |= 1 << I830_FENCE_TILING_Y_SHIFT; @@ -2209,7 +2229,7 @@ try_again:  		loff_t offset;  		if (avail == 0) -			return -ENOMEM; +			return -ENOSPC;  		for (i = dev_priv->fence_reg_start;  		     i < dev_priv->num_fence_regs; i++) { @@ -2242,7 +2262,7 @@ try_again:  				i915_gem_flush(dev,  					       I915_GEM_GPU_DOMAINS,  					       I915_GEM_GPU_DOMAINS); -				seqno = i915_add_request(dev, +				seqno = i915_add_request(dev, NULL,  							 I915_GEM_GPU_DOMAINS);  				if (seqno == 0)  					return -ENOMEM; @@ -2254,9 +2274,6 @@ try_again:  			goto try_again;  		} -		BUG_ON(old_obj_priv->active || -		       (reg->obj->write_domain & I915_GEM_GPU_DOMAINS)); -  		/*  		 * Zap this virtual mapping so we can set up a fence again  		 * for this object next time we need it. @@ -2361,7 +2378,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)  		spin_unlock(&dev_priv->mm.active_list_lock);  		if (lists_empty) {  			DRM_ERROR("GTT full, but LRU list empty\n"); -			return -ENOMEM; +			return -ENOSPC;  		}  		ret = i915_gem_evict_something(dev); @@ -2406,8 +2423,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)  	 * wasn't in the GTT, there shouldn't be any way it could have been in  	 * a GPU cache  	 */ -	BUG_ON(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); -	BUG_ON(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); +	BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); +	BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);  	return 0;  } @@ -2424,6 +2441,16 @@ i915_gem_clflush_object(struct drm_gem_object *obj)  	if (obj_priv->pages == NULL)  		return; +	/* XXX: The 865 in particular appears to be weird in how it handles +	 * cache flushing.  We haven't figured it out, but the +	 * clflush+agp_chipset_flush doesn't appear to successfully get the +	 * data visible to the PGU, while wbinvd + agp_chipset_flush does. +	 */ +	if (IS_I865G(obj->dev)) { +		wbinvd(); +		return; +	} +  	drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);  } @@ -2439,7 +2466,7 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)  	/* Queue the GPU write cache flushing we need. */  	i915_gem_flush(dev, 0, obj->write_domain); -	seqno = i915_add_request(dev, obj->write_domain); +	seqno = i915_add_request(dev, NULL, obj->write_domain);  	obj->write_domain = 0;  	i915_gem_object_move_to_active(obj, seqno);  } @@ -3022,20 +3049,12 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,  	drm_i915_private_t *dev_priv = dev->dev_private;  	int nbox = exec->num_cliprects;  	int i = 0, count; -	uint32_t	exec_start, exec_len; +	uint32_t exec_start, exec_len;  	RING_LOCALS;  	exec_start = (uint32_t) exec_offset + exec->batch_start_offset;  	exec_len = (uint32_t) exec->batch_len; -	if ((exec_start | exec_len) & 0x7) { -		DRM_ERROR("alignment\n"); -		return -EINVAL; -	} - -	if (!exec_start) -		return -EINVAL; -  	count = nbox ? nbox : 1;  	for (i = 0; i < count; i++) { @@ -3076,6 +3095,10 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,  /* Throttle our rendering by waiting until the ring has completed our requests   * emitted over 20 msec ago.   * + * Note that if we were to use the current jiffies each time around the loop, + * we wouldn't escape the function with any frames outstanding if the time to + * render a frame was over 20ms. + *   * This should get us reasonable parallelism between CPU and GPU but also   * relatively low latency when blocking on a particular request to finish.   */ @@ -3084,15 +3107,25 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)  {  	struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;  	int ret = 0; -	uint32_t seqno; +	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);  	mutex_lock(&dev->struct_mutex); -	seqno = i915_file_priv->mm.last_gem_throttle_seqno; -	i915_file_priv->mm.last_gem_throttle_seqno = -		i915_file_priv->mm.last_gem_seqno; -	if (seqno) -		ret = i915_wait_request(dev, seqno); +	while (!list_empty(&i915_file_priv->mm.request_list)) { +		struct drm_i915_gem_request *request; + +		request = list_first_entry(&i915_file_priv->mm.request_list, +					   struct drm_i915_gem_request, +					   client_list); + +		if (time_after_eq(request->emitted_jiffies, recent_enough)) +			break; + +		ret = i915_wait_request(dev, request->seqno); +		if (ret != 0) +			break; +	}  	mutex_unlock(&dev->struct_mutex); +  	return ret;  } @@ -3111,7 +3144,7 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,  		reloc_count += exec_list[i].relocation_count;  	} -	*relocs = drm_calloc(reloc_count, sizeof(**relocs), DRM_MEM_DRIVER); +	*relocs = drm_calloc_large(reloc_count, sizeof(**relocs));  	if (*relocs == NULL)  		return -ENOMEM; @@ -3125,8 +3158,7 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,  				     exec_list[i].relocation_count *  				     sizeof(**relocs));  		if (ret != 0) { -			drm_free(*relocs, reloc_count * sizeof(**relocs), -				 DRM_MEM_DRIVER); +			drm_free_large(*relocs);  			*relocs = NULL;  			return -EFAULT;  		} @@ -3165,17 +3197,34 @@ i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list,  	}  err: -	drm_free(relocs, reloc_count * sizeof(*relocs), DRM_MEM_DRIVER); +	drm_free_large(relocs);  	return ret;  } +static int +i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer *exec, +			   uint64_t exec_offset) +{ +	uint32_t exec_start, exec_len; + +	exec_start = (uint32_t) exec_offset + exec->batch_start_offset; +	exec_len = (uint32_t) exec->batch_len; + +	if ((exec_start | exec_len) & 0x7) +		return -EINVAL; + +	if (!exec_start) +		return -EINVAL; + +	return 0; +} +  int  i915_gem_execbuffer(struct drm_device *dev, void *data,  		    struct drm_file *file_priv)  {  	drm_i915_private_t *dev_priv = dev->dev_private; -	struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;  	struct drm_i915_gem_execbuffer *args = data;  	struct drm_i915_gem_exec_object *exec_list = NULL;  	struct drm_gem_object **object_list = NULL; @@ -3198,10 +3247,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,  		return -EINVAL;  	}  	/* Copy in the exec list from userland */ -	exec_list = drm_calloc(sizeof(*exec_list), args->buffer_count, -			       DRM_MEM_DRIVER); -	object_list = drm_calloc(sizeof(*object_list), args->buffer_count, -				 DRM_MEM_DRIVER); +	exec_list = drm_calloc_large(sizeof(*exec_list), args->buffer_count); +	object_list = drm_calloc_large(sizeof(*object_list), args->buffer_count);  	if (exec_list == NULL || object_list == NULL) {  		DRM_ERROR("Failed to allocate exec or object list "  			  "for %d buffers\n", @@ -3302,7 +3349,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,  			break;  		/* error other than GTT full, or we've already tried again */ -		if (ret != -ENOMEM || pin_tries >= 1) { +		if (ret != -ENOSPC || pin_tries >= 1) {  			if (ret != -ERESTARTSYS)  				DRM_ERROR("Failed to pin buffers %d\n", ret);  			goto err; @@ -3321,8 +3368,20 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,  	/* Set the pending read domains for the batch buffer to COMMAND */  	batch_obj = object_list[args->buffer_count-1]; -	batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND; -	batch_obj->pending_write_domain = 0; +	if (batch_obj->pending_write_domain) { +		DRM_ERROR("Attempting to use self-modifying batch buffer\n"); +		ret = -EINVAL; +		goto err; +	} +	batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + +	/* Sanity check the batch buffer, prior to moving objects */ +	exec_offset = exec_list[args->buffer_count - 1].offset; +	ret = i915_gem_check_execbuffer (args, exec_offset); +	if (ret != 0) { +		DRM_ERROR("execbuf with invalid offset/length\n"); +		goto err; +	}  	i915_verify_inactive(dev, __FILE__, __LINE__); @@ -3353,7 +3412,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,  			       dev->invalidate_domains,  			       dev->flush_domains);  		if (dev->flush_domains) -			(void)i915_add_request(dev, dev->flush_domains); +			(void)i915_add_request(dev, file_priv, +					       dev->flush_domains);  	}  	for (i = 0; i < args->buffer_count; i++) { @@ -3371,8 +3431,6 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,  	}  #endif -	exec_offset = exec_list[args->buffer_count - 1].offset; -  #if WATCH_EXEC  	i915_gem_dump_object(batch_obj,  			      args->batch_len, @@ -3402,9 +3460,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,  	 * *some* interrupts representing completion of buffers that we can  	 * wait on when trying to clear up gtt space).  	 */ -	seqno = i915_add_request(dev, flush_domains); +	seqno = i915_add_request(dev, file_priv, flush_domains);  	BUG_ON(seqno == 0); -	i915_file_priv->mm.last_gem_seqno = seqno;  	for (i = 0; i < args->buffer_count; i++) {  		struct drm_gem_object *obj = object_list[i]; @@ -3462,10 +3519,8 @@ err:  	}  pre_mutex_err: -	drm_free(object_list, sizeof(*object_list) * args->buffer_count, -		 DRM_MEM_DRIVER); -	drm_free(exec_list, sizeof(*exec_list) * args->buffer_count, -		 DRM_MEM_DRIVER); +	drm_free_large(object_list); +	drm_free_large(exec_list);  	drm_free(cliprects, sizeof(*cliprects) * args->num_cliprects,  		 DRM_MEM_DRIVER); @@ -3512,8 +3567,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)  		atomic_inc(&dev->pin_count);  		atomic_add(obj->size, &dev->pin_memory);  		if (!obj_priv->active && -		    (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | -					   I915_GEM_DOMAIN_GTT)) == 0 && +		    (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0 &&  		    !list_empty(&obj_priv->list))  			list_del_init(&obj_priv->list);  	} @@ -3540,8 +3594,7 @@ i915_gem_object_unpin(struct drm_gem_object *obj)  	 */  	if (obj_priv->pin_count == 0) {  		if (!obj_priv->active && -		    (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | -					   I915_GEM_DOMAIN_GTT)) == 0) +		    (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)  			list_move_tail(&obj_priv->list,  				       &dev_priv->mm.inactive_list);  		atomic_dec(&dev->pin_count); @@ -3645,15 +3698,14 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,  	struct drm_gem_object *obj;  	struct drm_i915_gem_object *obj_priv; -	mutex_lock(&dev->struct_mutex);  	obj = drm_gem_object_lookup(dev, file_priv, args->handle);  	if (obj == NULL) {  		DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",  			  args->handle); -		mutex_unlock(&dev->struct_mutex);  		return -EBADF;  	} +	mutex_lock(&dev->struct_mutex);  	/* Update the active list for the hardware's current position.  	 * Otherwise this only updates on a delayed timer or when irqs are  	 * actually unmasked, and our working set ends up being larger than @@ -3792,9 +3844,8 @@ i915_gem_idle(struct drm_device *dev)  	/* Flush the GPU along with all non-CPU write domains  	 */ -	i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT), -		       ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); -	seqno = i915_add_request(dev, ~I915_GEM_DOMAIN_CPU); +	i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); +	seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);  	if (seqno == 0) {  		mutex_unlock(&dev->struct_mutex); @@ -4344,3 +4395,17 @@ i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,  	drm_agp_chipset_flush(dev);  	return 0;  } + +void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv) +{ +	struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; + +	/* Clean up our request list when the client is going away, so that +	 * later retire_requests won't dereference our soon-to-be-gone +	 * file_priv. +	 */ +	mutex_lock(&dev->struct_mutex); +	while (!list_empty(&i915_file_priv->mm.request_list)) +		list_del_init(i915_file_priv->mm.request_list.next); +	mutex_unlock(&dev->struct_mutex); +}  |