[PATCH] [Karmic] UBUNTU: [Upstream] drm/i915: Fix CPU-spinning hangs related to fence usage using an LRU

Leann Ogasawara leann.ogasawara at canonical.com
Tue Sep 1 18:16:40 UTC 2009


Hi Guys,

https://bugs.edge.launchpad.net/ubuntu/+source/linux/+bug/419264

Bug 419264 reports 100% CPU usage with the latest mesa/libdrm updates.
Upstream noted that the following kernel patch should resolve the issue.
I posted a test kernel with the backported patch to Karmic and have
received confirmations that the bug no longer appears.  Unfortunately,
it seems this patch will not hit the 2.6.31 release window in time.
Please consider carrying until it is merged upstream.

The following changes since commit a48b3d2c10a457e2dafed8cf924f2e6d7fdeb695:
  Stefan Bader (1):
        UBUNTU: SAUCE: Remove ov511 driver from ubuntu subdirectory

are available in the git repository at:

  git://kernel.ubuntu.com/ogasawara/ubuntu-karmic.git lp419264

Leann Ogasawara (1):
      UBUNTU: [Upstream] drm/i915: Fix CPU-spinning hangs related to fence usage using an LRU

 drivers/gpu/drm/i915/i915_drv.h |    6 +++
 drivers/gpu/drm/i915/i915_gem.c |   86 ++++++++++++++++++++++-----------------
 2 files changed, 54 insertions(+), 38 deletions(-)

>From 9d2e3cff7532fb4ffb5a470d20e8d16df3337a57 Mon Sep 17 00:00:00 2001
From: Leann Ogasawara <leann.ogasawara at canonical.com>
Date: Mon, 31 Aug 2009 10:23:08 -0700
Subject: [PATCH] UBUNTU: [Upstream] drm/i915: Fix CPU-spinning hangs related to fence usage using an LRU

OriginalLocation: http://patchwork.kernel.org/patch/44710/
BugLink: http://bugs.launchpad.net/bugs/419264

The lack of a proper LRU was partially worked around by taking the fence
with the object containing the oldest seqno to steal from.  But if there
are multiple objects inactive, then they don't have seqnos and the first
in fence reg among them would be chosen.  If you were trying to copy
data between two mappings, this could result in each page fault stealing
the fence from the other argument, and your application hanging.

https://bugs.freedesktop.org/show_bug.cgi?id=23566
https://bugs.freedesktop.org/show_bug.cgi?id=23220
https://bugs.freedesktop.org/show_bug.cgi?id=23253
https://bugs.freedesktop.org/show_bug.cgi?id=23366

Signed-off-by: Eric Anholt <eric at anholt.net>
Signed-off-by: Leann Ogasawara <leann.ogasawara at canonical.com>
---
 drivers/gpu/drm/i915/i915_drv.h |    6 +++
 drivers/gpu/drm/i915/i915_gem.c |   86 ++++++++++++++++++++++-----------------
 2 files changed, 54 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7537f57..11fc4b6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -384,6 +384,9 @@ typedef struct drm_i915_private {
 		 */
 		struct list_head inactive_list;
 
+		/** LRU list of objects with fence regs on them. */
+		struct list_head fence_list;
+
 		/**
 		 * List of breadcrumbs associated with GPU requests currently
 		 * outstanding.
@@ -451,6 +454,9 @@ struct drm_i915_gem_object {
 	/** This object's place on the active/flushing/inactive lists */
 	struct list_head list;
 
+	/** This object's place on the fenced object LRU */
+	struct list_head fence_list;
+
 	/**
 	 * This is set if the object is on the active or flushing lists
 	 * (has pending rendering), and is not set if it's on inactive (ready
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 140bee1..0c07a75 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -978,6 +978,7 @@ int
 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *file_priv)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_set_domain *args = data;
 	struct drm_gem_object *obj;
 	uint32_t read_domains = args->read_domains;
@@ -1010,8 +1011,18 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 		 obj, obj->size, read_domains, write_domain);
 #endif
 	if (read_domains & I915_GEM_DOMAIN_GTT) {
+		struct drm_i915_gem_object *obj_priv = obj->driver_private;
+
 		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
 
+		/* Update the LRU on the fence for the CPU access that's
+		 * about to occur.
+		 */
+		if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
+			list_move_tail(&obj_priv->fence_list,
+				       &dev_priv->mm.fence_list);
+		}
+
 		/* Silently promote "you're not bound, there was nothing to do"
 		 * to success, since the client was just asking us to
 		 * make sure everything was done.
@@ -1155,8 +1166,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 
 	/* Need a new fence register? */
-	if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
-	    obj_priv->tiling_mode != I915_TILING_NONE) {
+	if (obj_priv->tiling_mode != I915_TILING_NONE) {
 		ret = i915_gem_object_get_fence_reg(obj);
 		if (ret) {
 			mutex_unlock(&dev->struct_mutex);
@@ -2208,6 +2218,12 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
 	struct drm_i915_gem_object *old_obj_priv = NULL;
 	int i, ret, avail;
 
+	/* Just update our place in the LRU if our fence is getting used. */
+	if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
+		list_move_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list);
+		return 0;
+	}
+
 	switch (obj_priv->tiling_mode) {
 	case I915_TILING_NONE:
 		WARN(1, "allocating a fence for non-tiled object?\n");
@@ -2229,7 +2245,6 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
 	}
 
 	/* First try to find a free reg */
-try_again:
 	avail = 0;
 	for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
 		reg = &dev_priv->fence_regs[i];
@@ -2243,52 +2258,41 @@ try_again:
 
 	/* None available, try to steal one or wait for a user to finish */
 	if (i == dev_priv->num_fence_regs) {
-		uint32_t seqno = dev_priv->mm.next_gem_seqno;
+		struct drm_gem_object *old_obj = NULL;
 
 		if (avail == 0)
 			return -ENOSPC;
 
-		for (i = dev_priv->fence_reg_start;
-		     i < dev_priv->num_fence_regs; i++) {
-			uint32_t this_seqno;
+		list_for_each_entry(old_obj_priv, &dev_priv->mm.fence_list,
+				    fence_list) {
+			old_obj = old_obj_priv->obj;
 
-			reg = &dev_priv->fence_regs[i];
-			old_obj_priv = reg->obj->driver_private;
+			reg = &dev_priv->fence_regs[old_obj_priv->fence_reg];
 
 			if (old_obj_priv->pin_count)
 				continue;
 
+			/* Take a reference, as otherwise the wait_rendering
+			 * below may cause the object to get freed out from
+			 * under us.
+			 */
+			drm_gem_object_reference(old_obj);
+
 			/* i915 uses fences for GPU access to tiled buffers */
 			if (IS_I965G(dev) || !old_obj_priv->active)
 				break;
 
-			/* find the seqno of the first available fence */
-			this_seqno = old_obj_priv->last_rendering_seqno;
-			if (this_seqno != 0 &&
-			    reg->obj->write_domain == 0 &&
-			    i915_seqno_passed(seqno, this_seqno))
-				seqno = this_seqno;
-		}
-
-		/*
-		 * Now things get ugly... we have to wait for one of the
-		 * objects to finish before trying again.
-		 */
-		if (i == dev_priv->num_fence_regs) {
-			if (seqno == dev_priv->mm.next_gem_seqno) {
-				i915_gem_flush(dev,
-					       I915_GEM_GPU_DOMAINS,
-					       I915_GEM_GPU_DOMAINS);
-				seqno = i915_add_request(dev, NULL,
-							 I915_GEM_GPU_DOMAINS);
-				if (seqno == 0)
-					return -ENOMEM;
-			}
-
-			ret = i915_wait_request(dev, seqno);
-			if (ret)
+			/* This brings the object to the head of the LRU if it
+			 * had been written to.  The only way this should
+			 * result in us waiting longer than the expected
+			 * optimal amount of time is if there was a
+			 * fence-using buffer later that was read-only.
+			 */
+			i915_gem_object_flush_gpu_write_domain(old_obj);
+			ret = i915_gem_object_wait_rendering(old_obj);
+			if (ret != 0)
 				return ret;
-			goto try_again;
+			break;
 		}
 
 		/*
@@ -2296,10 +2300,15 @@ try_again:
 		 * for this object next time we need it.
 		 */
 		i915_gem_release_mmap(reg->obj);
+		i = old_obj_priv->fence_reg;
 		old_obj_priv->fence_reg = I915_FENCE_REG_NONE;
+		list_del_init(&old_obj_priv->fence_list);
+		drm_gem_object_unreference(old_obj);
 	}
 
 	obj_priv->fence_reg = i;
+	list_add_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list);
+
 	reg->obj = obj;
 
 	if (IS_I965G(dev))
@@ -2342,6 +2351,7 @@ i915_gem_clear_fence_reg(struct drm_gem_object *obj)
 
 	dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL;
 	obj_priv->fence_reg = I915_FENCE_REG_NONE;
+	list_del_init(&obj_priv->fence_list);
 }
 
 /**
@@ -3595,9 +3605,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
 	 * Pre-965 chips need a fence register set up in order to
 	 * properly handle tiled surfaces.
 	 */
-	if (!IS_I965G(dev) &&
-	    obj_priv->fence_reg == I915_FENCE_REG_NONE &&
-	    obj_priv->tiling_mode != I915_TILING_NONE) {
+	if (!IS_I965G(dev) && obj_priv->tiling_mode != I915_TILING_NONE) {
 		ret = i915_gem_object_get_fence_reg(obj);
 		if (ret != 0) {
 			if (ret != -EBUSY && ret != -ERESTARTSYS)
@@ -3806,6 +3814,7 @@ int i915_gem_init_object(struct drm_gem_object *obj)
 	obj_priv->obj = obj;
 	obj_priv->fence_reg = I915_FENCE_REG_NONE;
 	INIT_LIST_HEAD(&obj_priv->list);
+	INIT_LIST_HEAD(&obj_priv->fence_list);
 
 	return 0;
 }
@@ -4253,6 +4262,7 @@ i915_gem_load(struct drm_device *dev)
 	INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
 	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
 	INIT_LIST_HEAD(&dev_priv->mm.request_list);
+	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
 	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
 			  i915_gem_retire_work_handler);
 	dev_priv->mm.next_gem_seqno = 1;
-- 
1.6.3.3









More information about the kernel-team mailing list