[PATCH] [Karmic] UBUNTU: [Upstream] drm/i915: Fix CPU-spinning hangs related to fence usage using an LRU

Tue Sep 1 19:00:58 UTC 2009

Leann Ogasawara wrote:
> Hi Guys,
> 
> https://bugs.edge.launchpad.net/ubuntu/+source/linux/+bug/419264
> 
> Bug 419264 reports 100% CPU usage with the latest mesa/libdrm updates.
> Upstream noted that the following kernel patch should resolve the issue.
> I posted a test kernel with the backported patch to Karmic and have
> received confirmations that the bug no longer appears.  Unfortunately,
> it seems this patch will not hit the 2.6.31 release window in time.
> Please consider carrying until it is merged upstream.
> 
> The following changes since commit a48b3d2c10a457e2dafed8cf924f2e6d7fdeb695:
>   Stefan Bader (1):
>         UBUNTU: SAUCE: Remove ov511 driver from ubuntu subdirectory
> 
> are available in the git repository at:
> 
>   git://kernel.ubuntu.com/ogasawara/ubuntu-karmic.git lp419264
> 
> Leann Ogasawara (1):
>       UBUNTU: [Upstream] drm/i915: Fix CPU-spinning hangs related to fence usage using an LRU
> 
>  drivers/gpu/drm/i915/i915_drv.h |    6 +++
>  drivers/gpu/drm/i915/i915_gem.c |   86 ++++++++++++++++++++++-----------------
>  2 files changed, 54 insertions(+), 38 deletions(-)
> 
>>From 9d2e3cff7532fb4ffb5a470d20e8d16df3337a57 Mon Sep 17 00:00:00 2001
> From: Leann Ogasawara <leann.ogasawara at canonical.com>
> Date: Mon, 31 Aug 2009 10:23:08 -0700
> Subject: [PATCH] UBUNTU: [Upstream] drm/i915: Fix CPU-spinning hangs related to fence usage using an LRU
> 
> OriginalLocation: http://patchwork.kernel.org/patch/44710/
> BugLink: http://bugs.launchpad.net/bugs/419264
> 
> The lack of a proper LRU was partially worked around by taking the fence
> with the object containing the oldest seqno to steal from.  But if there
> are multiple objects inactive, then they don't have seqnos and the first
> in fence reg among them would be chosen.  If you were trying to copy
> data between two mappings, this could result in each page fault stealing
> the fence from the other argument, and your application hanging.
> 
> https://bugs.freedesktop.org/show_bug.cgi?id=23566
> https://bugs.freedesktop.org/show_bug.cgi?id=23220
> https://bugs.freedesktop.org/show_bug.cgi?id=23253
> https://bugs.freedesktop.org/show_bug.cgi?id=23366
> 
> Signed-off-by: Eric Anholt <eric at anholt.net>
> Signed-off-by: Leann Ogasawara <leann.ogasawara at canonical.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h |    6 +++
>  drivers/gpu/drm/i915/i915_gem.c |   86 ++++++++++++++++++++++-----------------
>  2 files changed, 54 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7537f57..11fc4b6 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -384,6 +384,9 @@ typedef struct drm_i915_private {
>  		 */
>  		struct list_head inactive_list;
>  
> +		/** LRU list of objects with fence regs on them. */
> +		struct list_head fence_list;
> +
>  		/**
>  		 * List of breadcrumbs associated with GPU requests currently
>  		 * outstanding.
> @@ -451,6 +454,9 @@ struct drm_i915_gem_object {
>  	/** This object's place on the active/flushing/inactive lists */
>  	struct list_head list;
>  
> +	/** This object's place on the fenced object LRU */
> +	struct list_head fence_list;
> +
>  	/**
>  	 * This is set if the object is on the active or flushing lists
>  	 * (has pending rendering), and is not set if it's on inactive (ready
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 140bee1..0c07a75 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -978,6 +978,7 @@ int
>  i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>  			  struct drm_file *file_priv)
>  {
> +	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_i915_gem_set_domain *args = data;
>  	struct drm_gem_object *obj;
>  	uint32_t read_domains = args->read_domains;
> @@ -1010,8 +1011,18 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>  		 obj, obj->size, read_domains, write_domain);
>  #endif
>  	if (read_domains & I915_GEM_DOMAIN_GTT) {
> +		struct drm_i915_gem_object *obj_priv = obj->driver_private;
> +
>  		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
>  
> +		/* Update the LRU on the fence for the CPU access that's
> +		 * about to occur.
> +		 */
> +		if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
> +			list_move_tail(&obj_priv->fence_list,
> +				       &dev_priv->mm.fence_list);
> +		}
> +
>  		/* Silently promote "you're not bound, there was nothing to do"
>  		 * to success, since the client was just asking us to
>  		 * make sure everything was done.
> @@ -1155,8 +1166,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	}
>  
>  	/* Need a new fence register? */
> -	if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
> -	    obj_priv->tiling_mode != I915_TILING_NONE) {
> +	if (obj_priv->tiling_mode != I915_TILING_NONE) {
>  		ret = i915_gem_object_get_fence_reg(obj);
>  		if (ret) {
>  			mutex_unlock(&dev->struct_mutex);
> @@ -2208,6 +2218,12 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
>  	struct drm_i915_gem_object *old_obj_priv = NULL;
>  	int i, ret, avail;
>  
> +	/* Just update our place in the LRU if our fence is getting used. */
> +	if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
> +		list_move_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list);
> +		return 0;
> +	}
> +
>  	switch (obj_priv->tiling_mode) {
>  	case I915_TILING_NONE:
>  		WARN(1, "allocating a fence for non-tiled object?\n");
> @@ -2229,7 +2245,6 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
>  	}
>  
>  	/* First try to find a free reg */
> -try_again:
>  	avail = 0;
>  	for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
>  		reg = &dev_priv->fence_regs[i];
> @@ -2243,52 +2258,41 @@ try_again:
>  
>  	/* None available, try to steal one or wait for a user to finish */
>  	if (i == dev_priv->num_fence_regs) {
> -		uint32_t seqno = dev_priv->mm.next_gem_seqno;
> +		struct drm_gem_object *old_obj = NULL;
>  
>  		if (avail == 0)
>  			return -ENOSPC;
>  
> -		for (i = dev_priv->fence_reg_start;
> -		     i < dev_priv->num_fence_regs; i++) {
> -			uint32_t this_seqno;
> +		list_for_each_entry(old_obj_priv, &dev_priv->mm.fence_list,
> +				    fence_list) {
> +			old_obj = old_obj_priv->obj;
>  
> -			reg = &dev_priv->fence_regs[i];
> -			old_obj_priv = reg->obj->driver_private;
> +			reg = &dev_priv->fence_regs[old_obj_priv->fence_reg];
>  
>  			if (old_obj_priv->pin_count)
>  				continue;
>  
> +			/* Take a reference, as otherwise the wait_rendering
> +			 * below may cause the object to get freed out from
> +			 * under us.
> +			 */
> +			drm_gem_object_reference(old_obj);
> +
>  			/* i915 uses fences for GPU access to tiled buffers */
>  			if (IS_I965G(dev) || !old_obj_priv->active)
>  				break;
>  
> -			/* find the seqno of the first available fence */
> -			this_seqno = old_obj_priv->last_rendering_seqno;
> -			if (this_seqno != 0 &&
> -			    reg->obj->write_domain == 0 &&
> -			    i915_seqno_passed(seqno, this_seqno))
> -				seqno = this_seqno;
> -		}
> -
> -		/*
> -		 * Now things get ugly... we have to wait for one of the
> -		 * objects to finish before trying again.
> -		 */
> -		if (i == dev_priv->num_fence_regs) {
> -			if (seqno == dev_priv->mm.next_gem_seqno) {
> -				i915_gem_flush(dev,
> -					       I915_GEM_GPU_DOMAINS,
> -					       I915_GEM_GPU_DOMAINS);
> -				seqno = i915_add_request(dev, NULL,
> -							 I915_GEM_GPU_DOMAINS);
> -				if (seqno == 0)
> -					return -ENOMEM;
> -			}
> -
> -			ret = i915_wait_request(dev, seqno);
> -			if (ret)
> +			/* This brings the object to the head of the LRU if it
> +			 * had been written to.  The only way this should
> +			 * result in us waiting longer than the expected
> +			 * optimal amount of time is if there was a
> +			 * fence-using buffer later that was read-only.
> +			 */
> +			i915_gem_object_flush_gpu_write_domain(old_obj);
> +			ret = i915_gem_object_wait_rendering(old_obj);
> +			if (ret != 0)
>  				return ret;
> -			goto try_again;
> +			break;
>  		}
>  
>  		/*
> @@ -2296,10 +2300,15 @@ try_again:
>  		 * for this object next time we need it.
>  		 */
>  		i915_gem_release_mmap(reg->obj);
> +		i = old_obj_priv->fence_reg;
>  		old_obj_priv->fence_reg = I915_FENCE_REG_NONE;
> +		list_del_init(&old_obj_priv->fence_list);
> +		drm_gem_object_unreference(old_obj);
>  	}
>  
>  	obj_priv->fence_reg = i;
> +	list_add_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list);
> +
>  	reg->obj = obj;
>  
>  	if (IS_I965G(dev))
> @@ -2342,6 +2351,7 @@ i915_gem_clear_fence_reg(struct drm_gem_object *obj)
>  
>  	dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL;
>  	obj_priv->fence_reg = I915_FENCE_REG_NONE;
> +	list_del_init(&obj_priv->fence_list);
>  }
>  
>  /**
> @@ -3595,9 +3605,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
>  	 * Pre-965 chips need a fence register set up in order to
>  	 * properly handle tiled surfaces.
>  	 */
> -	if (!IS_I965G(dev) &&
> -	    obj_priv->fence_reg == I915_FENCE_REG_NONE &&
> -	    obj_priv->tiling_mode != I915_TILING_NONE) {
> +	if (!IS_I965G(dev) && obj_priv->tiling_mode != I915_TILING_NONE) {
>  		ret = i915_gem_object_get_fence_reg(obj);
>  		if (ret != 0) {
>  			if (ret != -EBUSY && ret != -ERESTARTSYS)
> @@ -3806,6 +3814,7 @@ int i915_gem_init_object(struct drm_gem_object *obj)
>  	obj_priv->obj = obj;
>  	obj_priv->fence_reg = I915_FENCE_REG_NONE;
>  	INIT_LIST_HEAD(&obj_priv->list);
> +	INIT_LIST_HEAD(&obj_priv->fence_list);
>  
>  	return 0;
>  }
> @@ -4253,6 +4262,7 @@ i915_gem_load(struct drm_device *dev)
>  	INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
>  	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
>  	INIT_LIST_HEAD(&dev_priv->mm.request_list);
> +	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
>  	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
>  			  i915_gem_retire_work_handler);
>  	dev_priv->mm.next_gem_seqno = 1;

applied - I've encountered this one myself (I think), if a locked X 
session is any indication.

-- 
Tim Gardner tim.gardner at canonical.com