ACK: [B][C][SRU][PATCH 1/1] z3fold: fix possible reclaim races

Kleber Souza kleber.souza at canonical.com
Tue Apr 23 09:08:48 UTC 2019


On 4/17/19 5:30 AM, Po-Hsu Lin wrote:
> From: Vitaly Wool <vitalywool at gmail.com>
> 
> https://bugs.launchpad.net/bugs/1814874
> 
> Reclaim and free can race on an object which is basically fine but in
> order for reclaim to be able to map "freed" object we need to encode
> object length in the handle.  handle_to_chunks() is then introduced to
> extract object length from a handle and use it during mapping.
> 
> Moreover, to avoid racing on a z3fold "headless" page release, we should
> not try to free that page in z3fold_free() if the reclaim bit is set.
> Also, in the unlikely case of trying to reclaim a page being freed, we
> should not proceed with that page.
> 
> While at it, fix the page accounting in reclaim function.
> 
> This patch supersedes "[PATCH] z3fold: fix reclaim lock-ups".
> 
> Link: http://lkml.kernel.org/r/20181105162225.74e8837d03583a9b707cf559@gmail.com
> Signed-off-by: Vitaly Wool <vitaly.vul at sony.com>
> Signed-off-by: Jongseok Kim <ks77sj at gmail.com>
> Reported-by-by: Jongseok Kim <ks77sj at gmail.com>
> Reviewed-by: Snild Dolkow <snild at sony.com>
> Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
> Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
> (cherry picked from commit ca0246bb97c23da9d267c2107c07fb77e38205c9)
> Signed-off-by: Po-Hsu Lin <po-hsu.lin at canonical.com>

Clean cherry-pick, reproducible and tested.

Acked-by: Kleber Sacilotto de Souza <kleber.souza at canonical.com>

> ---
>  mm/z3fold.c | 101 +++++++++++++++++++++++++++++++++++++-----------------------
>  1 file changed, 62 insertions(+), 39 deletions(-)
> 
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 4b366d1..aee9b0b 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -99,6 +99,7 @@ struct z3fold_header {
>  #define NCHUNKS		((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)
>  
>  #define BUDDY_MASK	(0x3)
> +#define BUDDY_SHIFT	2
>  
>  /**
>   * struct z3fold_pool - stores metadata for each z3fold pool
> @@ -145,7 +146,7 @@ enum z3fold_page_flags {
>  	MIDDLE_CHUNK_MAPPED,
>  	NEEDS_COMPACTING,
>  	PAGE_STALE,
> -	UNDER_RECLAIM
> +	PAGE_CLAIMED, /* by either reclaim or free */
>  };
>  
>  /*****************
> @@ -174,7 +175,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page,
>  	clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
>  	clear_bit(NEEDS_COMPACTING, &page->private);
>  	clear_bit(PAGE_STALE, &page->private);
> -	clear_bit(UNDER_RECLAIM, &page->private);
> +	clear_bit(PAGE_CLAIMED, &page->private);
>  
>  	spin_lock_init(&zhdr->page_lock);
>  	kref_init(&zhdr->refcount);
> @@ -223,8 +224,11 @@ static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
>  	unsigned long handle;
>  
>  	handle = (unsigned long)zhdr;
> -	if (bud != HEADLESS)
> -		handle += (bud + zhdr->first_num) & BUDDY_MASK;
> +	if (bud != HEADLESS) {
> +		handle |= (bud + zhdr->first_num) & BUDDY_MASK;
> +		if (bud == LAST)
> +			handle |= (zhdr->last_chunks << BUDDY_SHIFT);
> +	}
>  	return handle;
>  }
>  
> @@ -234,6 +238,12 @@ static struct z3fold_header *handle_to_z3fold_header(unsigned long handle)
>  	return (struct z3fold_header *)(handle & PAGE_MASK);
>  }
>  
> +/* only for LAST bud, returns zero otherwise */
> +static unsigned short handle_to_chunks(unsigned long handle)
> +{
> +	return (handle & ~PAGE_MASK) >> BUDDY_SHIFT;
> +}
> +
>  /*
>   * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle
>   *  but that doesn't matter. because the masking will result in the
> @@ -720,37 +730,39 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>  	page = virt_to_page(zhdr);
>  
>  	if (test_bit(PAGE_HEADLESS, &page->private)) {
> -		/* HEADLESS page stored */
> -		bud = HEADLESS;
> -	} else {
> -		z3fold_page_lock(zhdr);
> -		bud = handle_to_buddy(handle);
> -
> -		switch (bud) {
> -		case FIRST:
> -			zhdr->first_chunks = 0;
> -			break;
> -		case MIDDLE:
> -			zhdr->middle_chunks = 0;
> -			zhdr->start_middle = 0;
> -			break;
> -		case LAST:
> -			zhdr->last_chunks = 0;
> -			break;
> -		default:
> -			pr_err("%s: unknown bud %d\n", __func__, bud);
> -			WARN_ON(1);
> -			z3fold_page_unlock(zhdr);
> -			return;
> +		/* if a headless page is under reclaim, just leave.
> +		 * NB: we use test_and_set_bit for a reason: if the bit
> +		 * has not been set before, we release this page
> +		 * immediately so we don't care about its value any more.
> +		 */
> +		if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) {
> +			spin_lock(&pool->lock);
> +			list_del(&page->lru);
> +			spin_unlock(&pool->lock);
> +			free_z3fold_page(page);
> +			atomic64_dec(&pool->pages_nr);
>  		}
> +		return;
>  	}
>  
> -	if (bud == HEADLESS) {
> -		spin_lock(&pool->lock);
> -		list_del(&page->lru);
> -		spin_unlock(&pool->lock);
> -		free_z3fold_page(page);
> -		atomic64_dec(&pool->pages_nr);
> +	/* Non-headless case */
> +	z3fold_page_lock(zhdr);
> +	bud = handle_to_buddy(handle);
> +
> +	switch (bud) {
> +	case FIRST:
> +		zhdr->first_chunks = 0;
> +		break;
> +	case MIDDLE:
> +		zhdr->middle_chunks = 0;
> +		break;
> +	case LAST:
> +		zhdr->last_chunks = 0;
> +		break;
> +	default:
> +		pr_err("%s: unknown bud %d\n", __func__, bud);
> +		WARN_ON(1);
> +		z3fold_page_unlock(zhdr);
>  		return;
>  	}
>  
> @@ -758,7 +770,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>  		atomic64_dec(&pool->pages_nr);
>  		return;
>  	}
> -	if (test_bit(UNDER_RECLAIM, &page->private)) {
> +	if (test_bit(PAGE_CLAIMED, &page->private)) {
>  		z3fold_page_unlock(zhdr);
>  		return;
>  	}
> @@ -836,20 +848,30 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>  		}
>  		list_for_each_prev(pos, &pool->lru) {
>  			page = list_entry(pos, struct page, lru);
> +
> +			/* this bit could have been set by free, in which case
> +			 * we pass over to the next page in the pool.
> +			 */
> +			if (test_and_set_bit(PAGE_CLAIMED, &page->private))
> +				continue;
> +
> +			zhdr = page_address(page);
>  			if (test_bit(PAGE_HEADLESS, &page->private))
> -				/* candidate found */
>  				break;
>  
> -			zhdr = page_address(page);
> -			if (!z3fold_page_trylock(zhdr))
> +			if (!z3fold_page_trylock(zhdr)) {
> +				zhdr = NULL;
>  				continue; /* can't evict at this point */
> +			}
>  			kref_get(&zhdr->refcount);
>  			list_del_init(&zhdr->buddy);
>  			zhdr->cpu = -1;
> -			set_bit(UNDER_RECLAIM, &page->private);
>  			break;
>  		}
>  
> +		if (!zhdr)
> +			break;
> +
>  		list_del_init(&page->lru);
>  		spin_unlock(&pool->lock);
>  
> @@ -898,6 +920,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>  		if (test_bit(PAGE_HEADLESS, &page->private)) {
>  			if (ret == 0) {
>  				free_z3fold_page(page);
> +				atomic64_dec(&pool->pages_nr);
>  				return 0;
>  			}
>  			spin_lock(&pool->lock);
> @@ -905,7 +928,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>  			spin_unlock(&pool->lock);
>  		} else {
>  			z3fold_page_lock(zhdr);
> -			clear_bit(UNDER_RECLAIM, &page->private);
> +			clear_bit(PAGE_CLAIMED, &page->private);
>  			if (kref_put(&zhdr->refcount,
>  					release_z3fold_page_locked)) {
>  				atomic64_dec(&pool->pages_nr);
> @@ -964,7 +987,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
>  		set_bit(MIDDLE_CHUNK_MAPPED, &page->private);
>  		break;
>  	case LAST:
> -		addr += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT);
> +		addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT);
>  		break;
>  	default:
>  		pr_err("unknown buddy id %d\n", buddy);
> 




More information about the kernel-team mailing list