ACK: [PATCH Trusty regression SRU] aio: fix reqs_available handling
Colin Ian King
colin.king at canonical.com
Fri Nov 11 15:53:53 UTC 2016
On 11/11/16 15:50, Tim Gardner wrote:
> From: Benjamin LaHaise <bcrl at kvack.org>
>
> BugLink: http://bugs.launchpad.net/bugs/1641129
>
> As reported by Dan Aloni, commit f8567a3845ac ("aio: fix aio request
> leak when events are reaped by userspace") introduces a regression when
> user code attempts to perform io_submit() with more events than are
> available in the ring buffer. Reverting that commit would reintroduce a
> regression when user space event reaping is used.
>
> Fixing this bug is a bit more involved than the previous attempts to fix
> this regression. Since we do not have a single point at which we can
> count events as being reaped by user space and io_getevents(), we have
> to track event completion by looking at the number of events left in the
> event ring. So long as there are as many events in the ring buffer as
> there have been completion events generate, we cannot call
> put_reqs_available(). The code to check for this is now placed in
> refill_reqs_available().
>
> A test program from Dan and modified by me for verifying this bug is available
> at http://www.kvack.org/~bcrl/20140824-aio_bug.c .
>
> Reported-by: Dan Aloni <dan at kernelim.com>
> Signed-off-by: Benjamin LaHaise <bcrl at kvack.org>
> Acked-by: Dan Aloni <dan at kernelim.com>
> Cc: Kent Overstreet <kmo at daterainc.com>
> Cc: Mateusz Guzik <mguzik at redhat.com>
> Cc: Petr Matousek <pmatouse at redhat.com>
> Cc: stable at vger.kernel.org # v3.16 and anything that f8567a3845ac was backported to
> Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
> (cherry picked from commit d856f32a86b2b015ab180ab7a55e455ed8d3ccc5)
> Signed-off-by: Tim Gardner <tim.gardner at canonical.com>
> ---
> fs/aio.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 73 insertions(+), 4 deletions(-)
>
> diff --git a/fs/aio.c b/fs/aio.c
> index 775476b..db7adac 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -136,6 +136,7 @@ struct kioctx {
>
> struct {
> unsigned tail;
> + unsigned completed_events;
> spinlock_t completion_lock;
> } ____cacheline_aligned_in_smp;
>
> @@ -876,6 +877,68 @@ out:
> return ret;
> }
>
> +/* refill_reqs_available
> + * Updates the reqs_available reference counts used for tracking the
> + * number of free slots in the completion ring. This can be called
> + * from aio_complete() (to optimistically update reqs_available) or
> + * from aio_get_req() (the we're out of events case). It must be
> + * called holding ctx->completion_lock.
> + */
> +static void refill_reqs_available(struct kioctx *ctx, unsigned head,
> + unsigned tail)
> +{
> + unsigned events_in_ring, completed;
> +
> + /* Clamp head since userland can write to it. */
> + head %= ctx->nr_events;
> + if (head <= tail)
> + events_in_ring = tail - head;
> + else
> + events_in_ring = ctx->nr_events - (head - tail);
> +
> + completed = ctx->completed_events;
> + if (events_in_ring < completed)
> + completed -= events_in_ring;
> + else
> + completed = 0;
> +
> + if (!completed)
> + return;
> +
> + ctx->completed_events -= completed;
> + put_reqs_available(ctx, completed);
> +}
> +
> +/* user_refill_reqs_available
> + * Called to refill reqs_available when aio_get_req() encounters an
> + * out of space in the completion ring.
> + */
> +static void user_refill_reqs_available(struct kioctx *ctx)
> +{
> + spin_lock_irq(&ctx->completion_lock);
> + if (ctx->completed_events) {
> + struct aio_ring *ring;
> + unsigned head;
> +
> + /* Access of ring->head may race with aio_read_events_ring()
> + * here, but that's okay since whether we read the old version
> + * or the new version, and either will be valid. The important
> + * part is that head cannot pass tail since we prevent
> + * aio_complete() from updating tail by holding
> + * ctx->completion_lock. Even if head is invalid, the check
> + * against ctx->completed_events below will make sure we do the
> + * safe/right thing.
> + */
> + ring = kmap_atomic(ctx->ring_pages[0]);
> + head = ring->head;
> + kunmap_atomic(ring);
> +
> + refill_reqs_available(ctx, head, ctx->tail);
> + }
> +
> + spin_unlock_irq(&ctx->completion_lock);
> +}
> +
> /* aio_get_req
> * Allocate a slot for an aio request.
> * Returns NULL if no requests are free.
> @@ -884,8 +947,11 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx)
> {
> struct kiocb *req;
>
> - if (!get_reqs_available(ctx))
> - return NULL;
> + if (!get_reqs_available(ctx)) {
> + user_refill_reqs_available(ctx);
> + if (!get_reqs_available(ctx))
> + return NULL;
> + }
>
> req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
> if (unlikely(!req))
> @@ -944,8 +1010,8 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
> struct kioctx *ctx = iocb->ki_ctx;
> struct aio_ring *ring;
> struct io_event *ev_page, *event;
> + unsigned tail, pos, head;
> unsigned long flags;
> - unsigned tail, pos;
>
> /*
> * Special case handling for sync iocbs:
> @@ -1006,10 +1072,14 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
> ctx->tail = tail;
>
> ring = kmap_atomic(ctx->ring_pages[0]);
> + head = ring->head;
> ring->tail = tail;
> kunmap_atomic(ring);
> flush_dcache_page(ctx->ring_pages[0]);
>
> + ctx->completed_events++;
> + if (ctx->completed_events > 1)
> + refill_reqs_available(ctx, head, tail);
> spin_unlock_irqrestore(&ctx->completion_lock, flags);
>
> pr_debug("added to ring %p at [%u]\n", iocb, tail);
> @@ -1024,7 +1094,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
>
> /* everything turned out well, dispose of the aiocb. */
> kiocb_free(iocb);
> - put_reqs_available(ctx, 1);
>
> /*
> * We have to order our ring_info tail store above and test
>
Clean upstream cherry pick, testable and sane.
Acked-by: Colin Ian King <colin.king at canonical.com>
More information about the kernel-team
mailing list