ACK/cmnt: [PATCH][B][aws] UBUNTU SAUCE: mm: swap: improve swap readahead heuristic
Kamal Mostafa
kamal at canonical.com
Wed Dec 11 20:10:55 UTC 2019
LGTM. As we've discussed, I suggest also submitting this upstream, maybe
enabled by a heuristic-selector switch.
Acked-by: Kamal Mostafa <kamal at canonical.com>
-Kamal
On Tue, Dec 03, 2019 at 11:58:59AM +0100, Andrea Righi wrote:
> BugLink: https://bugs.launchpad.net/bugs/1831940
>
> Apply a more aggressive swapin readahead policy to improve swapoff
> performance.
>
> The idea is to start with no readahead (only read one page) and linearly
> increment the amount of readahead pages each time swapin_readahead() is
> called, up to the maximum cluster size (defined by vm.page-cluster),
> then go back to one page to give the disk enough time to prefetch the
> requested pages and avoid re-requesting them multiple times.
>
> Also increase the default vm.page-cluster size to 8 (that seems to work
> better with this new heuristic).
>
> Signed-off-by: Andrea Righi <andrea.righi at canonical.com>
> ---
> mm/swap.c | 2 +-
> mm/swap_state.c | 60 ++++++++-----------------------------------------
> 2 files changed, 10 insertions(+), 52 deletions(-)
>
> diff --git a/mm/swap.c b/mm/swap.c
> index abc82e6c14d1..5603bc987ef0 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -1022,7 +1022,7 @@ void __init swap_setup(void)
> if (megs < 16)
> page_cluster = 2;
> else
> - page_cluster = 3;
> + page_cluster = 8;
> /*
> * Right now other parts of the system means that we
> * _really_ don't want to cluster much more
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index 6dac8c6ee6d9..a2246bcebc77 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -472,62 +472,21 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
> return retpage;
> }
>
> -static unsigned int __swapin_nr_pages(unsigned long prev_offset,
> - unsigned long offset,
> - int hits,
> - int max_pages,
> - int prev_win)
> -{
> - unsigned int pages, last_ra;
> -
> - /*
> - * This heuristic has been found to work well on both sequential and
> - * random loads, swapping to hard disk or to SSD: please don't ask
> - * what the "+ 2" means, it just happens to work well, that's all.
> - */
> - pages = hits + 2;
> - if (pages == 2) {
> - /*
> - * We can have no readahead hits to judge by: but must not get
> - * stuck here forever, so check for an adjacent offset instead
> - * (and don't even bother to check whether swap type is same).
> - */
> - if (offset != prev_offset + 1 && offset != prev_offset - 1)
> - pages = 1;
> - } else {
> - unsigned int roundup = 4;
> - while (roundup < pages)
> - roundup <<= 1;
> - pages = roundup;
> - }
> -
> - if (pages > max_pages)
> - pages = max_pages;
> -
> - /* Don't shrink readahead too fast */
> - last_ra = prev_win / 2;
> - if (pages < last_ra)
> - pages = last_ra;
> -
> - return pages;
> -}
> -
> static unsigned long swapin_nr_pages(unsigned long offset)
> {
> - static unsigned long prev_offset;
> - unsigned int hits, pages, max_pages;
> - static atomic_t last_readahead_pages;
> + static unsigned int prev_pages;
> + unsigned long pages, max_pages;
>
> max_pages = 1 << READ_ONCE(page_cluster);
> if (max_pages <= 1)
> return 1;
>
> - hits = atomic_xchg(&swapin_readahead_hits, 0);
> - pages = __swapin_nr_pages(prev_offset, offset, hits, max_pages,
> - atomic_read(&last_readahead_pages));
> - if (!hits)
> - prev_offset = offset;
> - atomic_set(&last_readahead_pages, pages);
> + pages = READ_ONCE(prev_pages) + 1;
> + if (pages > max_pages) {
> + WRITE_ONCE(prev_pages, 0);
> + pages = max_pages;
> + } else
> + WRITE_ONCE(prev_pages, pages);
>
> return pages;
> }
> @@ -684,8 +643,7 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
> pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
> prev_win = SWAP_RA_WIN(swap_ra_info);
> hits = SWAP_RA_HITS(swap_ra_info);
> - swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits,
> - max_win, prev_win);
> + swap_ra->win = win = swapin_nr_pages(fpfn);
> atomic_long_set(&vma->swap_readahead_info,
> SWAP_RA_VAL(faddr, win, 0));
>
> --
> 2.17.1
>
>
> --
> kernel-team mailing list
> kernel-team at lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team
More information about the kernel-team
mailing list