[PATCH][B][aws] UBUNTU SAUCE: mm: swap: improve swap readahead heuristic

Tue Dec 3 10:58:59 UTC 2019

BugLink: https://bugs.launchpad.net/bugs/1831940

Apply a more aggressive swapin readahead policy to improve swapoff
performance.

The idea is to start with no readahead (only read one page) and linearly
increment the amount of readahead pages each time swapin_readahead() is
called, up to the maximum cluster size (defined by vm.page-cluster),
then go back to one page to give the disk enough time to prefetch the
requested pages and avoid re-requesting them multiple times.

Also increase the default vm.page-cluster size to 8 (that seems to work
better with this new heuristic).

Signed-off-by: Andrea Righi <andrea.righi at canonical.com>
---
 mm/swap.c       |  2 +-
 mm/swap_state.c | 60 ++++++++-----------------------------------------
 2 files changed, 10 insertions(+), 52 deletions(-)

diff --git a/mm/swap.c b/mm/swap.c
index abc82e6c14d1..5603bc987ef0 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -1022,7 +1022,7 @@ void __init swap_setup(void)
 	if (megs < 16)
 		page_cluster = 2;
 	else
-		page_cluster = 3;
+		page_cluster = 8;
 	/*
 	 * Right now other parts of the system means that we
 	 * _really_ don't want to cluster much more
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 6dac8c6ee6d9..a2246bcebc77 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -472,62 +472,21 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 	return retpage;
 }
 
-static unsigned int __swapin_nr_pages(unsigned long prev_offset,
-				      unsigned long offset,
-				      int hits,
-				      int max_pages,
-				      int prev_win)
-{
-	unsigned int pages, last_ra;
-
-	/*
-	 * This heuristic has been found to work well on both sequential and
-	 * random loads, swapping to hard disk or to SSD: please don't ask
-	 * what the "+ 2" means, it just happens to work well, that's all.
-	 */
-	pages = hits + 2;
-	if (pages == 2) {
-		/*
-		 * We can have no readahead hits to judge by: but must not get
-		 * stuck here forever, so check for an adjacent offset instead
-		 * (and don't even bother to check whether swap type is same).
-		 */
-		if (offset != prev_offset + 1 && offset != prev_offset - 1)
-			pages = 1;
-	} else {
-		unsigned int roundup = 4;
-		while (roundup < pages)
-			roundup <<= 1;
-		pages = roundup;
-	}
-
-	if (pages > max_pages)
-		pages = max_pages;
-
-	/* Don't shrink readahead too fast */
-	last_ra = prev_win / 2;
-	if (pages < last_ra)
-		pages = last_ra;
-
-	return pages;
-}
-
 static unsigned long swapin_nr_pages(unsigned long offset)
 {
-	static unsigned long prev_offset;
-	unsigned int hits, pages, max_pages;
-	static atomic_t last_readahead_pages;
+	static unsigned int prev_pages;
+	unsigned long pages, max_pages;
 
 	max_pages = 1 << READ_ONCE(page_cluster);
 	if (max_pages <= 1)
 		return 1;
 
-	hits = atomic_xchg(&swapin_readahead_hits, 0);
-	pages = __swapin_nr_pages(prev_offset, offset, hits, max_pages,
-				  atomic_read(&last_readahead_pages));
-	if (!hits)
-		prev_offset = offset;
-	atomic_set(&last_readahead_pages, pages);
+	pages = READ_ONCE(prev_pages) + 1;
+	if (pages > max_pages) {
+		WRITE_ONCE(prev_pages, 0);
+		pages = max_pages;
+	} else
+		WRITE_ONCE(prev_pages, pages);
 
 	return pages;
 }
@@ -684,8 +643,7 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
 	pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
 	prev_win = SWAP_RA_WIN(swap_ra_info);
 	hits = SWAP_RA_HITS(swap_ra_info);
-	swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits,
-					       max_win, prev_win);
+	swap_ra->win = win = swapin_nr_pages(fpfn);
 	atomic_long_set(&vma->swap_readahead_info,
 			SWAP_RA_VAL(faddr, win, 0));
 
-- 
2.17.1