<div dir="ltr">I sent out the same SRU email twice, sorry about that<div>please check the first one and ignore the second one</div><div><br></div><div>Thanks,</div><div>Gerald</div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Tue, May 18, 2021 at 11:00 AM Gerald Yang <<a href="mailto:gerald.yang@canonical.com">gerald.yang@canonical.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">From: Ignat Korchagin <<a href="mailto:ignat@cloudflare.com" target="_blank">ignat@cloudflare.com</a>><br>
<br>
BugLink: <a href="https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1910976" rel="noreferrer" target="_blank">https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1910976</a><br>
<br>
This is a follow up to [1] that detailed latency problems associated<br>
with dm-crypt's use of workqueues when processing IO.<br>
<br>
Current dm-crypt implementation creates a significant IO performance<br>
overhead (at least on small IO block sizes) for both latency and<br>
throughput. We suspect offloading IO request processing into<br>
workqueues and async threads is more harmful these days with the<br>
modern fast storage. I also did some digging into the dm-crypt git<br>
history and much of this async processing is not needed anymore,<br>
because the reasons it was added are mostly gone from the kernel. More<br>
details can be found in [2] (see "Git archeology" section).<br>
<br>
This change adds DM_CRYPT_NO_READ_WORKQUEUE and<br>
DM_CRYPT_NO_WRITE_WORKQUEUE flags for read and write BIOs, which<br>
direct dm-crypt to not offload crypto operations into kcryptd<br>
workqueues.  In addition, writes are not buffered to be sorted in the<br>
dm-crypt red-black tree, but dispatched immediately. For cases, where<br>
crypto operations cannot happen (hard interrupt context, for example<br>
the read path of some NVME drivers), we offload the work to a tasklet<br>
rather than a workqueue.<br>
<br>
These flags only ensure no async BIO processing in the dm-crypt<br>
module. It is worth noting that some Crypto API implementations may<br>
offload encryption into their own workqueues, which are independent of<br>
the dm-crypt and its configuration. However upon enabling these new<br>
flags dm-crypt will instruct Crypto API not to backlog crypto<br>
requests.<br>
<br>
To give an idea of the performance gains for certain workloads,<br>
consider the script, and results when tested against various<br>
devices, detailed here:<br>
<a href="https://www.redhat.com/archives/dm-devel/2020-July/msg00138.html" rel="noreferrer" target="_blank">https://www.redhat.com/archives/dm-devel/2020-July/msg00138.html</a><br>
<br>
[1]: <a href="https://www.spinics.net/lists/dm-crypt/msg07516.html" rel="noreferrer" target="_blank">https://www.spinics.net/lists/dm-crypt/msg07516.html</a><br>
[2]: <a href="https://blog.cloudflare.com/speeding-up-linux-disk-encryption/" rel="noreferrer" target="_blank">https://blog.cloudflare.com/speeding-up-linux-disk-encryption/</a><br>
<br>
Signed-off-by: Ignat Korchagin <<a href="mailto:ignat@cloudflare.com" target="_blank">ignat@cloudflare.com</a>><br>
Reviewed-by: Damien Le Moal <<a href="mailto:damien.lemoal@wdc.com" target="_blank">damien.lemoal@wdc.com</a>><br>
Reviewed-by: Bob Liu <<a href="mailto:bob.liu@oracle.com" target="_blank">bob.liu@oracle.com</a>><br>
Signed-off-by: Mike Snitzer <<a href="mailto:snitzer@redhat.com" target="_blank">snitzer@redhat.com</a>><br>
(cherry picked from commit 39d42fa96ba1b7d2544db3f8ed5da8fb0d5cb877)<br>
Signed-off-by: Gerald Yang <<a href="mailto:gerald.yang@canonical.com" target="_blank">gerald.yang@canonical.com</a>><br>
---<br>
 drivers/md/dm-crypt.c | 50 ++++++++++++++++++++++++++++++++++++-------<br>
 1 file changed, 42 insertions(+), 8 deletions(-)<br>
<br>
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c<br>
index d85648b9c247..91aa99e1d698 100644<br>
--- a/drivers/md/dm-crypt.c<br>
+++ b/drivers/md/dm-crypt.c<br>
@@ -67,6 +67,7 @@ struct dm_crypt_io {<br>
        u8 *integrity_metadata;<br>
        bool integrity_metadata_from_pool;<br>
        struct work_struct work;<br>
+       struct tasklet_struct tasklet;<br>
<br>
        struct convert_context ctx;<br>
<br>
@@ -120,7 +121,8 @@ struct iv_tcw_private {<br>
  * and encrypts / decrypts at the same time.<br>
  */<br>
 enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,<br>
-            DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD };<br>
+            DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD,<br>
+            DM_CRYPT_NO_READ_WORKQUEUE, DM_CRYPT_NO_WRITE_WORKQUEUE };<br>
<br>
 enum cipher_flags {<br>
        CRYPT_MODE_INTEGRITY_AEAD,      /* Use authenticated mode for cihper */<br>
@@ -1211,7 +1213,7 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_<br>
  * Encrypt / decrypt data from one bio to another one (can be the same one)<br>
  */<br>
 static blk_status_t crypt_convert(struct crypt_config *cc,<br>
-                        struct convert_context *ctx)<br>
+                        struct convert_context *ctx, bool atomic)<br>
 {<br>
        unsigned int tag_offset = 0;<br>
        unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT;<br>
@@ -1254,7 +1256,8 @@ static blk_status_t crypt_convert(struct crypt_config *cc,<br>
                        atomic_dec(&ctx->cc_pending);<br>
                        ctx->cc_sector += sector_step;<br>
                        tag_offset++;<br>
-                       cond_resched();<br>
+                       if (!atomic)<br>
+                               cond_resched();<br>
                        continue;<br>
                /*<br>
                 * There was a data integrity error.<br>
@@ -1580,7 +1583,8 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)<br>
<br>
        clone->bi_iter.bi_sector = cc->start + io->sector;<br>
<br>
-       if (likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) {<br>
+       if ((likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) ||<br>
+           test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)) {<br>
                generic_make_request(clone);<br>
                return;<br>
        }<br>
@@ -1629,7 +1633,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)<br>
        sector += bio_sectors(clone);<br>
<br>
        crypt_inc_pending(io);<br>
-       r = crypt_convert(cc, &io->ctx);<br>
+       r = crypt_convert(cc, &io->ctx,<br>
+                         test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags));<br>
        if (r)<br>
                io->error = r;<br>
        crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);<br>
@@ -1659,7 +1664,8 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)<br>
        crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio,<br>
                           io->sector);<br>
<br>
-       r = crypt_convert(cc, &io->ctx);<br>
+       r = crypt_convert(cc, &io->ctx,<br>
+                         test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags));<br>
        if (r)<br>
                io->error = r;<br>
<br>
@@ -1719,10 +1725,28 @@ static void kcryptd_crypt(struct work_struct *work)<br>
                kcryptd_crypt_write_convert(io);<br>
 }<br>
<br>
+static void kcryptd_crypt_tasklet(unsigned long work)<br>
+{<br>
+       kcryptd_crypt((struct work_struct *)work);<br>
+}<br>
+<br>
 static void kcryptd_queue_crypt(struct dm_crypt_io *io)<br>
 {<br>
        struct crypt_config *cc = io->cc;<br>
<br>
+       if ((bio_data_dir(io->base_bio) == READ && test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) ||<br>
+           (bio_data_dir(io->base_bio) == WRITE && test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))) {<br>
+               if (in_irq()) {<br>
+                       /* Crypto API's "skcipher_walk_first() refuses to work in hard IRQ context */<br>
+                       tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work);<br>
+                       tasklet_schedule(&io->tasklet);<br>
+                       return;<br>
+               }<br>
+<br>
+               kcryptd_crypt(&io->work);<br>
+               return;<br>
+       }<br>
+<br>
        INIT_WORK(&io->work, kcryptd_crypt);<br>
        queue_work(cc->crypt_queue, &io->work);<br>
 }<br>
@@ -2483,7 +2507,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar<br>
        struct crypt_config *cc = ti->private;<br>
        struct dm_arg_set as;<br>
        static const struct dm_arg _args[] = {<br>
-               {0, 6, "Invalid number of feature args"},<br>
+               {0, 8, "Invalid number of feature args"},<br>
        };<br>
        unsigned int opt_params, val;<br>
        const char *opt_string, *sval;<br>
@@ -2513,6 +2537,10 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar<br>
<br>
                else if (!strcasecmp(opt_string, "submit_from_crypt_cpus"))<br>
                        set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);<br>
+               else if (!strcasecmp(opt_string, "no_read_workqueue"))<br>
+                       set_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);<br>
+               else if (!strcasecmp(opt_string, "no_write_workqueue"))<br>
+                       set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);<br>
                else if (sscanf(opt_string, "integrity:%u:", &val) == 1) {<br>
                        if (val == 0 || val > MAX_TAG_SIZE) {<br>
                                ti->error = "Invalid integrity arguments";<br>
@@ -2842,6 +2870,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type,<br>
                num_feature_args += !!ti->num_discard_bios;<br>
                num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags);<br>
                num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);<br>
+               num_feature_args += test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);<br>
+               num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);<br>
                num_feature_args += cc->sector_size != (1 << SECTOR_SHIFT);<br>
                num_feature_args += test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags);<br>
                if (cc->on_disk_tag_size)<br>
@@ -2854,6 +2884,10 @@ static void crypt_status(struct dm_target *ti, status_type_t type,<br>
                                DMEMIT(" same_cpu_crypt");<br>
                        if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags))<br>
                                DMEMIT(" submit_from_crypt_cpus");<br>
+                       if (test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags))<br>
+                               DMEMIT(" no_read_workqueue");<br>
+                       if (test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))<br>
+                               DMEMIT(" no_write_workqueue");<br>
                        if (cc->on_disk_tag_size)<br>
                                DMEMIT(" integrity:%u:%s", cc->on_disk_tag_size, cc->cipher_auth);<br>
                        if (cc->sector_size != (1 << SECTOR_SHIFT))<br>
@@ -2966,7 +3000,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)<br>
<br>
 static struct target_type crypt_target = {<br>
        .name   = "crypt",<br>
-       .version = {1, 19, 0},<br>
+       .version = {1, 22, 0},<br>
        .module = THIS_MODULE,<br>
        .ctr    = crypt_ctr,<br>
        .dtr    = crypt_dtr,<br>
-- <br>
2.25.1<br>
<br>
</blockquote></div>