[PATCH][BIONIC] UBUNTU: SAUCE: Fix ARC hit rate (LP: #1755158)
Colin King
colin.king at canonical.com
Tue Mar 13 14:13:18 UTC 2018
From: Colin Ian King <colin.king at canonical.com>
BugLink: http://bugs.launchpad.net/bugs/1755158
Upstream ZFS fix, commit 0873bb6337452e3e028e40f5dad945b30deab185,
Fixes issue that can impact ARC hit rate especially with a small ARC
When the compressed ARC feature was added in commit d3c2ae1
the method of reference counting in the ARC was modified. As
part of this accounting change the arc_buf_add_ref() function
was removed entirely.
This would have be fine but the arc_buf_add_ref() function
served a second undocumented purpose of updating the ARC access
information when taking a hold on a dbuf. Without this logic
in place a cached dbuf would not migrate its associated
arc_buf_hdr_t to the MFU list. This would negatively impact
the ARC hit rate, particularly on systems with a small ARC.
Reviewed-by: Giuseppe Di Natale <dinatale2 at llnl.gov>
Reviewed-by: Tony Hutter <hutter2 at llnl.gov>
Reviewed-by: Tim Chase <tim at chase2k.com>
Reviewed by: George Wilson <george.wilson at delphix.com>
Reviewed-by: George Melikov <mail at gmelikov.ru>
Signed-off-by: Brian Behlendorf <behlendorf1 at llnl.gov>
Closes #6171
Closes #6852
Closes #6989
(Backported from upstream ZFS commit 0873bb6337452e3e028e40f5dad945b30deab185)
Signed-off-by: Colin Ian King <colin.king at canonical.com>
---
zfs/META | 2 +-
zfs/include/sys/arc.h | 28 +++++++++++++-------------
zfs/module/zfs/arc.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++-
zfs/module/zfs/dbuf.c | 4 +++-
4 files changed, 72 insertions(+), 16 deletions(-)
diff --git a/zfs/META b/zfs/META
index 18ca0f0..e3a052f 100644
--- a/zfs/META
+++ b/zfs/META
@@ -2,7 +2,7 @@ Meta: 1
Name: zfs
Branch: 1.0
Version: 0.7.5
-Release: 1ubuntu5
+Release: 1ubuntu6
Release-Tags: relext
License: CDDL
Author: OpenZFS on Linux
diff --git a/zfs/include/sys/arc.h b/zfs/include/sys/arc.h
index 66f37cf..e107155 100644
--- a/zfs/include/sys/arc.h
+++ b/zfs/include/sys/arc.h
@@ -95,35 +95,36 @@ typedef enum arc_flags
ARC_FLAG_CACHED = 1 << 3, /* I/O was in cache */
ARC_FLAG_L2CACHE = 1 << 4, /* cache in L2ARC */
ARC_FLAG_PREDICTIVE_PREFETCH = 1 << 5, /* I/O from zfetch */
+ ARC_FLAG_PRESCIENT_PREFETCH = 1 << 6, /* long min lifespan */
/*
* Private ARC flags. These flags are private ARC only flags that
* will show up in b_flags in the arc_hdr_buf_t. These flags should
* only be set by ARC code.
*/
- ARC_FLAG_IN_HASH_TABLE = 1 << 6, /* buffer is hashed */
- ARC_FLAG_IO_IN_PROGRESS = 1 << 7, /* I/O in progress */
- ARC_FLAG_IO_ERROR = 1 << 8, /* I/O failed for buf */
- ARC_FLAG_INDIRECT = 1 << 9, /* indirect block */
+ ARC_FLAG_IN_HASH_TABLE = 1 << 7, /* buffer is hashed */
+ ARC_FLAG_IO_IN_PROGRESS = 1 << 8, /* I/O in progress */
+ ARC_FLAG_IO_ERROR = 1 << 9, /* I/O failed for buf */
+ ARC_FLAG_INDIRECT = 1 << 10, /* indirect block */
/* Indicates that block was read with ASYNC priority. */
- ARC_FLAG_PRIO_ASYNC_READ = 1 << 10,
- ARC_FLAG_L2_WRITING = 1 << 11, /* write in progress */
- ARC_FLAG_L2_EVICTED = 1 << 12, /* evicted during I/O */
- ARC_FLAG_L2_WRITE_HEAD = 1 << 13, /* head of write list */
+ ARC_FLAG_PRIO_ASYNC_READ = 1 << 11,
+ ARC_FLAG_L2_WRITING = 1 << 12, /* write in progress */
+ ARC_FLAG_L2_EVICTED = 1 << 13, /* evicted during I/O */
+ ARC_FLAG_L2_WRITE_HEAD = 1 << 14, /* head of write list */
/* indicates that the buffer contains metadata (otherwise, data) */
- ARC_FLAG_BUFC_METADATA = 1 << 14,
+ ARC_FLAG_BUFC_METADATA = 1 << 15,
/* Flags specifying whether optional hdr struct fields are defined */
- ARC_FLAG_HAS_L1HDR = 1 << 15,
- ARC_FLAG_HAS_L2HDR = 1 << 16,
+ ARC_FLAG_HAS_L1HDR = 1 << 16,
+ ARC_FLAG_HAS_L2HDR = 1 << 17,
/*
* Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data.
* This allows the l2arc to use the blkptr's checksum to verify
* the data without having to store the checksum in the hdr.
*/
- ARC_FLAG_COMPRESSED_ARC = 1 << 17,
- ARC_FLAG_SHARED_DATA = 1 << 18,
+ ARC_FLAG_COMPRESSED_ARC = 1 << 18,
+ ARC_FLAG_SHARED_DATA = 1 << 19,
/*
* The arc buffer's compression mode is stored in the top 7 bits of the
@@ -221,6 +222,7 @@ void arc_buf_destroy(arc_buf_t *buf, void *tag);
void arc_buf_info(arc_buf_t *buf, arc_buf_info_t *abi, int state_index);
uint64_t arc_buf_size(arc_buf_t *buf);
uint64_t arc_buf_lsize(arc_buf_t *buf);
+void arc_buf_access(arc_buf_t *buf);
void arc_release(arc_buf_t *buf, void *tag);
int arc_released(arc_buf_t *buf);
void arc_buf_sigsegv(int sig, siginfo_t *si, void *unused);
diff --git a/zfs/module/zfs/arc.c b/zfs/module/zfs/arc.c
index 2b0a78d..d6ebb4f 100644
--- a/zfs/module/zfs/arc.c
+++ b/zfs/module/zfs/arc.c
@@ -430,8 +430,13 @@ typedef struct arc_stats {
*/
kstat_named_t arcstat_mutex_miss;
/*
+ * Number of buffers skipped when updating the access state due to the
+ * header having already been released after acquiring the hash lock.
+ */
+ kstat_named_t arcstat_access_skip;
+ /*
* Number of buffers skipped because they have I/O in progress, are
- * indrect prefetch buffers that have not lived long enough, or are
+ * indirect prefetch buffers that have not lived long enough, or are
* not from the spa we're trying to evict from.
*/
kstat_named_t arcstat_evict_skip;
@@ -667,6 +672,7 @@ static arc_stats_t arc_stats = {
{ "mfu_ghost_hits", KSTAT_DATA_UINT64 },
{ "deleted", KSTAT_DATA_UINT64 },
{ "mutex_miss", KSTAT_DATA_UINT64 },
+ { "access_skip", KSTAT_DATA_UINT64 },
{ "evict_skip", KSTAT_DATA_UINT64 },
{ "evict_not_enough", KSTAT_DATA_UINT64 },
{ "evict_l2_cached", KSTAT_DATA_UINT64 },
@@ -840,6 +846,8 @@ static taskq_t *arc_prune_taskq;
#define HDR_IO_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS)
#define HDR_IO_ERROR(hdr) ((hdr)->b_flags & ARC_FLAG_IO_ERROR)
#define HDR_PREFETCH(hdr) ((hdr)->b_flags & ARC_FLAG_PREFETCH)
+#define HDR_PRESCIENT_PREFETCH(hdr) \
+ ((hdr)->b_flags & ARC_FLAG_PRESCIENT_PREFETCH)
#define HDR_COMPRESSION_ENABLED(hdr) \
((hdr)->b_flags & ARC_FLAG_COMPRESSED_ARC)
@@ -4926,6 +4934,50 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
}
}
+/*
+ * This routine is called by dbuf_hold() to update the arc_access() state
+ * which otherwise would be skipped for entries in the dbuf cache.
+ */
+void
+arc_buf_access(arc_buf_t *buf)
+{
+ mutex_enter(&buf->b_evict_lock);
+ arc_buf_hdr_t *hdr = buf->b_hdr;
+
+ /*
+ * Avoid taking the hash_lock when possible as an optimization.
+ * The header must be checked again under the hash_lock in order
+ * to handle the case where it is concurrently being released.
+ */
+ if (hdr->b_l1hdr.b_state == arc_anon || HDR_EMPTY(hdr)) {
+ mutex_exit(&buf->b_evict_lock);
+ return;
+ }
+
+ kmutex_t *hash_lock = HDR_LOCK(hdr);
+ mutex_enter(hash_lock);
+
+ if (hdr->b_l1hdr.b_state == arc_anon || HDR_EMPTY(hdr)) {
+ mutex_exit(hash_lock);
+ mutex_exit(&buf->b_evict_lock);
+ ARCSTAT_BUMP(arcstat_access_skip);
+ return;
+ }
+
+ mutex_exit(&buf->b_evict_lock);
+
+ ASSERT(hdr->b_l1hdr.b_state == arc_mru ||
+ hdr->b_l1hdr.b_state == arc_mfu);
+
+ DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
+ arc_access(hdr, hash_lock);
+ mutex_exit(hash_lock);
+
+ ARCSTAT_BUMP(arcstat_hits);
+ ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr) && !HDR_PRESCIENT_PREFETCH(hdr),
+ demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data, metadata, hits);
+}
+
/* a generic arc_done_func_t which you can use */
/* ARGSUSED */
void
diff --git a/zfs/module/zfs/dbuf.c b/zfs/module/zfs/dbuf.c
index 60f52d2..4ee121f 100644
--- a/zfs/module/zfs/dbuf.c
+++ b/zfs/module/zfs/dbuf.c
@@ -2719,8 +2719,10 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
return (SET_ERROR(ENOENT));
}
- if (dh->dh_db->db_buf != NULL)
+ if (dh->dh_db->db_buf != NULL) {
+ arc_buf_access(dh->dh_db->db_buf);
ASSERT3P(dh->dh_db->db.db_data, ==, dh->dh_db->db_buf->b_data);
+ }
ASSERT(dh->dh_db->db_buf == NULL || arc_referenced(dh->dh_db->db_buf));
--
2.7.4
More information about the kernel-team
mailing list