[PATCH] [Karmic] UBUNTU: [Upstream] drm/i915: Fix CPU-spinning hangs related to fence usage using an LRU
Tim Gardner
tim.gardner at canonical.com
Tue Sep 1 19:00:58 UTC 2009
Leann Ogasawara wrote:
> Hi Guys,
>
> https://bugs.edge.launchpad.net/ubuntu/+source/linux/+bug/419264
>
> Bug 419264 reports 100% CPU usage with the latest mesa/libdrm updates.
> Upstream noted that the following kernel patch should resolve the issue.
> I posted a test kernel with the backported patch to Karmic and have
> received confirmations that the bug no longer appears. Unfortunately,
> it seems this patch will not hit the 2.6.31 release window in time.
> Please consider carrying until it is merged upstream.
>
> The following changes since commit a48b3d2c10a457e2dafed8cf924f2e6d7fdeb695:
> Stefan Bader (1):
> UBUNTU: SAUCE: Remove ov511 driver from ubuntu subdirectory
>
> are available in the git repository at:
>
> git://kernel.ubuntu.com/ogasawara/ubuntu-karmic.git lp419264
>
> Leann Ogasawara (1):
> UBUNTU: [Upstream] drm/i915: Fix CPU-spinning hangs related to fence usage using an LRU
>
> drivers/gpu/drm/i915/i915_drv.h | 6 +++
> drivers/gpu/drm/i915/i915_gem.c | 86 ++++++++++++++++++++++-----------------
> 2 files changed, 54 insertions(+), 38 deletions(-)
>
>>From 9d2e3cff7532fb4ffb5a470d20e8d16df3337a57 Mon Sep 17 00:00:00 2001
> From: Leann Ogasawara <leann.ogasawara at canonical.com>
> Date: Mon, 31 Aug 2009 10:23:08 -0700
> Subject: [PATCH] UBUNTU: [Upstream] drm/i915: Fix CPU-spinning hangs related to fence usage using an LRU
>
> OriginalLocation: http://patchwork.kernel.org/patch/44710/
> BugLink: http://bugs.launchpad.net/bugs/419264
>
> The lack of a proper LRU was partially worked around by taking the fence
> with the object containing the oldest seqno to steal from. But if there
> are multiple objects inactive, then they don't have seqnos and the first
> in fence reg among them would be chosen. If you were trying to copy
> data between two mappings, this could result in each page fault stealing
> the fence from the other argument, and your application hanging.
>
> https://bugs.freedesktop.org/show_bug.cgi?id=23566
> https://bugs.freedesktop.org/show_bug.cgi?id=23220
> https://bugs.freedesktop.org/show_bug.cgi?id=23253
> https://bugs.freedesktop.org/show_bug.cgi?id=23366
>
> Signed-off-by: Eric Anholt <eric at anholt.net>
> Signed-off-by: Leann Ogasawara <leann.ogasawara at canonical.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 6 +++
> drivers/gpu/drm/i915/i915_gem.c | 86 ++++++++++++++++++++++-----------------
> 2 files changed, 54 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7537f57..11fc4b6 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -384,6 +384,9 @@ typedef struct drm_i915_private {
> */
> struct list_head inactive_list;
>
> + /** LRU list of objects with fence regs on them. */
> + struct list_head fence_list;
> +
> /**
> * List of breadcrumbs associated with GPU requests currently
> * outstanding.
> @@ -451,6 +454,9 @@ struct drm_i915_gem_object {
> /** This object's place on the active/flushing/inactive lists */
> struct list_head list;
>
> + /** This object's place on the fenced object LRU */
> + struct list_head fence_list;
> +
> /**
> * This is set if the object is on the active or flushing lists
> * (has pending rendering), and is not set if it's on inactive (ready
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 140bee1..0c07a75 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -978,6 +978,7 @@ int
> i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file_priv)
> {
> + struct drm_i915_private *dev_priv = dev->dev_private;
> struct drm_i915_gem_set_domain *args = data;
> struct drm_gem_object *obj;
> uint32_t read_domains = args->read_domains;
> @@ -1010,8 +1011,18 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
> obj, obj->size, read_domains, write_domain);
> #endif
> if (read_domains & I915_GEM_DOMAIN_GTT) {
> + struct drm_i915_gem_object *obj_priv = obj->driver_private;
> +
> ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
>
> + /* Update the LRU on the fence for the CPU access that's
> + * about to occur.
> + */
> + if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
> + list_move_tail(&obj_priv->fence_list,
> + &dev_priv->mm.fence_list);
> + }
> +
> /* Silently promote "you're not bound, there was nothing to do"
> * to success, since the client was just asking us to
> * make sure everything was done.
> @@ -1155,8 +1166,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> }
>
> /* Need a new fence register? */
> - if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
> - obj_priv->tiling_mode != I915_TILING_NONE) {
> + if (obj_priv->tiling_mode != I915_TILING_NONE) {
> ret = i915_gem_object_get_fence_reg(obj);
> if (ret) {
> mutex_unlock(&dev->struct_mutex);
> @@ -2208,6 +2218,12 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
> struct drm_i915_gem_object *old_obj_priv = NULL;
> int i, ret, avail;
>
> + /* Just update our place in the LRU if our fence is getting used. */
> + if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
> + list_move_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list);
> + return 0;
> + }
> +
> switch (obj_priv->tiling_mode) {
> case I915_TILING_NONE:
> WARN(1, "allocating a fence for non-tiled object?\n");
> @@ -2229,7 +2245,6 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
> }
>
> /* First try to find a free reg */
> -try_again:
> avail = 0;
> for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
> reg = &dev_priv->fence_regs[i];
> @@ -2243,52 +2258,41 @@ try_again:
>
> /* None available, try to steal one or wait for a user to finish */
> if (i == dev_priv->num_fence_regs) {
> - uint32_t seqno = dev_priv->mm.next_gem_seqno;
> + struct drm_gem_object *old_obj = NULL;
>
> if (avail == 0)
> return -ENOSPC;
>
> - for (i = dev_priv->fence_reg_start;
> - i < dev_priv->num_fence_regs; i++) {
> - uint32_t this_seqno;
> + list_for_each_entry(old_obj_priv, &dev_priv->mm.fence_list,
> + fence_list) {
> + old_obj = old_obj_priv->obj;
>
> - reg = &dev_priv->fence_regs[i];
> - old_obj_priv = reg->obj->driver_private;
> + reg = &dev_priv->fence_regs[old_obj_priv->fence_reg];
>
> if (old_obj_priv->pin_count)
> continue;
>
> + /* Take a reference, as otherwise the wait_rendering
> + * below may cause the object to get freed out from
> + * under us.
> + */
> + drm_gem_object_reference(old_obj);
> +
> /* i915 uses fences for GPU access to tiled buffers */
> if (IS_I965G(dev) || !old_obj_priv->active)
> break;
>
> - /* find the seqno of the first available fence */
> - this_seqno = old_obj_priv->last_rendering_seqno;
> - if (this_seqno != 0 &&
> - reg->obj->write_domain == 0 &&
> - i915_seqno_passed(seqno, this_seqno))
> - seqno = this_seqno;
> - }
> -
> - /*
> - * Now things get ugly... we have to wait for one of the
> - * objects to finish before trying again.
> - */
> - if (i == dev_priv->num_fence_regs) {
> - if (seqno == dev_priv->mm.next_gem_seqno) {
> - i915_gem_flush(dev,
> - I915_GEM_GPU_DOMAINS,
> - I915_GEM_GPU_DOMAINS);
> - seqno = i915_add_request(dev, NULL,
> - I915_GEM_GPU_DOMAINS);
> - if (seqno == 0)
> - return -ENOMEM;
> - }
> -
> - ret = i915_wait_request(dev, seqno);
> - if (ret)
> + /* This brings the object to the head of the LRU if it
> + * had been written to. The only way this should
> + * result in us waiting longer than the expected
> + * optimal amount of time is if there was a
> + * fence-using buffer later that was read-only.
> + */
> + i915_gem_object_flush_gpu_write_domain(old_obj);
> + ret = i915_gem_object_wait_rendering(old_obj);
> + if (ret != 0)
> return ret;
> - goto try_again;
> + break;
> }
>
> /*
> @@ -2296,10 +2300,15 @@ try_again:
> * for this object next time we need it.
> */
> i915_gem_release_mmap(reg->obj);
> + i = old_obj_priv->fence_reg;
> old_obj_priv->fence_reg = I915_FENCE_REG_NONE;
> + list_del_init(&old_obj_priv->fence_list);
> + drm_gem_object_unreference(old_obj);
> }
>
> obj_priv->fence_reg = i;
> + list_add_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list);
> +
> reg->obj = obj;
>
> if (IS_I965G(dev))
> @@ -2342,6 +2351,7 @@ i915_gem_clear_fence_reg(struct drm_gem_object *obj)
>
> dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL;
> obj_priv->fence_reg = I915_FENCE_REG_NONE;
> + list_del_init(&obj_priv->fence_list);
> }
>
> /**
> @@ -3595,9 +3605,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
> * Pre-965 chips need a fence register set up in order to
> * properly handle tiled surfaces.
> */
> - if (!IS_I965G(dev) &&
> - obj_priv->fence_reg == I915_FENCE_REG_NONE &&
> - obj_priv->tiling_mode != I915_TILING_NONE) {
> + if (!IS_I965G(dev) && obj_priv->tiling_mode != I915_TILING_NONE) {
> ret = i915_gem_object_get_fence_reg(obj);
> if (ret != 0) {
> if (ret != -EBUSY && ret != -ERESTARTSYS)
> @@ -3806,6 +3814,7 @@ int i915_gem_init_object(struct drm_gem_object *obj)
> obj_priv->obj = obj;
> obj_priv->fence_reg = I915_FENCE_REG_NONE;
> INIT_LIST_HEAD(&obj_priv->list);
> + INIT_LIST_HEAD(&obj_priv->fence_list);
>
> return 0;
> }
> @@ -4253,6 +4262,7 @@ i915_gem_load(struct drm_device *dev)
> INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
> INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
> INIT_LIST_HEAD(&dev_priv->mm.request_list);
> + INIT_LIST_HEAD(&dev_priv->mm.fence_list);
> INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
> i915_gem_retire_work_handler);
> dev_priv->mm.next_gem_seqno = 1;
applied - I've encountered this one myself (I think), if a locked X
session is any indication.
--
Tim Gardner tim.gardner at canonical.com
More information about the kernel-team
mailing list